Skip to content

Commit

Permalink
Parametrization
Browse files Browse the repository at this point in the history
Add parametrization to text search language dictionary and parametrization of the Reciprocal Ranked Fusion "k-nearest neighbor" to score results of Hybrid Search
  • Loading branch information
SignalRT committed Jan 29, 2025
1 parent e8d0398 commit 8abde3c
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 10 deletions.
20 changes: 12 additions & 8 deletions extensions/Postgres/Postgres/Internals/PostgresDbClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ public PostgresDbClient(PostgresConfig config, ILoggerFactory? loggerFactory = n
this._dbNamePresent = config.ConnectionString.Contains("Database=", StringComparison.OrdinalIgnoreCase);
this._schema = config.Schema;
this._tableNamePrefix = config.TableNamePrefix;
this._textSearchLanguage = config.TextSearchLanguage;
this._rrf_K = config.RRF_K;

this._colId = config.Columns[PostgresConfig.ColumnId];
this._colEmbedding = config.Columns[PostgresConfig.ColumnEmbedding];
Expand Down Expand Up @@ -168,7 +170,7 @@ public async Task CreateTableAsync(
{
cmd.CommandText = this._createTableSql
.Replace(PostgresConfig.SqlPlaceholdersTableName, tableName, StringComparison.Ordinal)
.Replace(PostgresConfig.SqlPlaceholdersVectorSize, $"{vectorSize}", StringComparison.Ordinal)
.Replace(PostgresConfig.SqlPlaceholdersVectorSize, $"{vectorSize}", StringComparison.Ordinal)

Check warning on line 173 in extensions/Postgres/Postgres/Internals/PostgresDbClient.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, ubuntu-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 173 in extensions/Postgres/Postgres/Internals/PostgresDbClient.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Debug)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check warning on line 173 in extensions/Postgres/Postgres/Internals/PostgresDbClient.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, windows-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 173 in extensions/Postgres/Postgres/Internals/PostgresDbClient.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Release)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)
.Replace(PostgresConfig.SqlPlaceholdersLockId, $"{lockId}", StringComparison.Ordinal);

this._log.LogTrace("Creating table with custom SQL: {0}", cmd.CommandText);
Expand All @@ -186,11 +188,11 @@ public async Task CreateTableAsync(
{this._colPayload} JSONB DEFAULT '{{}}'::JSONB NOT NULL
);
CREATE INDEX IF NOT EXISTS ""{indexTags}"" ON {tableName} USING GIN({this._colTags});
CREATE INDEX IF NOT EXISTS ""{indexContent}"" ON {tableName} USING GIN(to_tsvector('english',{this._colContent}));
CREATE INDEX IF NOT EXISTS ""{indexContent}"" ON {tableName} USING GIN(to_tsvector('{this._textSearchLanguage}',{this._colContent}));
COMMIT;
";
#pragma warning restore CA2100

Check warning on line 195 in extensions/Postgres/Postgres/Internals/PostgresDbClient.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, ubuntu-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 195 in extensions/Postgres/Postgres/Internals/PostgresDbClient.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Debug)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check warning on line 195 in extensions/Postgres/Postgres/Internals/PostgresDbClient.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, windows-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 195 in extensions/Postgres/Postgres/Internals/PostgresDbClient.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Release)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)
this._log.LogTrace("Creating table with default SQL: {0}", cmd.CommandText);
}

Expand Down Expand Up @@ -428,7 +430,7 @@ DO UPDATE SET

// Column names
string columns = withEmbeddings ? this._columnsListWithEmbeddings : this._columnsListNoEmbeddings;
string columnsHibrid = this._columnsListHybrid;
string columnsHybrid = this._columnsListHybrid;
string columnsListHybridCoalesce = this._columnsListHybridCoalesce;

// Filtering logic, including filter by similarity
Expand Down Expand Up @@ -466,23 +468,23 @@ DO UPDATE SET
// the similarity (1 - distance) later. Furthermore, colDistance can't be used in the WHERE clause.
cmd.CommandText = @$"
WITH semantic_search AS (
SELECT {columnsHibrid}, RANK () OVER (ORDER BY {this._colEmbedding} <=> @embedding) AS rank
SELECT {columnsHybrid}, RANK () OVER (ORDER BY {this._colEmbedding} <=> @embedding) AS rank
FROM {tableName}
WHERE {filterSql}
ORDER BY {this._colEmbedding} <=> @embedding
LIMIT @limit
),
keyword_search AS (
SELECT {columnsHibrid}, RANK () OVER (ORDER BY ts_rank_cd(to_tsvector('english', {this._colContent}), query) DESC)
SELECT {columnsHybrid}, RANK () OVER (ORDER BY ts_rank_cd(to_tsvector('english', {this._colContent}), query) DESC)
FROM {tableName}, plainto_tsquery('english', @query) query
WHERE {filterSqlHybridText} AND to_tsvector('english', {this._colContent}) @@ query
ORDER BY ts_rank_cd(to_tsvector('english', {this._colContent}), query) DESC
LIMIT @limit
)
SELECT
{columnsListHybridCoalesce}
COALESCE(1.0 / (60 + semantic_search.rank), 0.0) +
COALESCE(1.0 / (60 + keyword_search.rank), 0.0) AS {colDistance}
COALESCE(1.0 / ({this._rrf_K} + semantic_search.rank), 0.0) +
COALESCE(1.0 / ({this._rrf_K} + keyword_search.rank), 0.0) AS {colDistance}
FROM semantic_search
FULL OUTER JOIN keyword_search ON semantic_search.{this._colId} = keyword_search.{this._colId}
ORDER BY {colDistance} DESC
Expand Down Expand Up @@ -750,6 +752,8 @@ public async ValueTask DisposeAsync()
private readonly string _columnsListHybrid;
private readonly string _columnsListHybridCoalesce;
private readonly bool _dbNamePresent;
private readonly string _textSearchLanguage;
private readonly int _rrf_K;

/// <summary>
/// Try to connect to PG, handling exceptions in case the DB doesn't exist
Expand Down
16 changes: 14 additions & 2 deletions extensions/Postgres/Postgres/PostgresConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,22 @@ public class PostgresConfig
/// </summary>
public bool UseHybridSearch { get; set; } = false;


/// <summary>
/// Create a new instance of the configuration
/// Defines the dictionary language the make the textual part of the hybrid Search in postgresql
/// see:
/// </summary>
public PostgresConfig()
public string TextSearchLanguage { get; set; } = "english";

/// <summary>

Check warning on line 123 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, ubuntu-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 123 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Debug)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check warning on line 123 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, windows-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 123 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Release)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)
/// Reciprocal Ranked Fusion "k-nearest neighbor" to score results of Hybrid Search

Check warning on line 124 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, ubuntu-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 124 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Debug)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check warning on line 124 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, windows-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 124 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Release)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)
/// </summary>
public int RRF_K { get; set; } = 50;

Check warning on line 126 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, ubuntu-latest)

Remove the underscores from member name Microsoft.KernelMemory.PostgresConfig.RRF_K (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca1707)

Check warning on line 126 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, ubuntu-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 126 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Debug)

Remove the underscores from member name Microsoft.KernelMemory.PostgresConfig.RRF_K (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca1707)

Check failure on line 126 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Debug)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check warning on line 126 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, windows-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 126 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Release)

Remove the underscores from member name Microsoft.KernelMemory.PostgresConfig.RRF_K (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca1707)

Check failure on line 126 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Release)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

/// <summary>

Check warning on line 128 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, ubuntu-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 128 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Debug)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check warning on line 128 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, windows-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 128 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Release)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)
/// Create a new instance of the configuration

Check warning on line 129 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, ubuntu-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 129 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Debug)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check warning on line 129 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, windows-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)

Check failure on line 129 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Build (9.0.x, ubuntu-latest, Release)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)
/// </summary>
public PostgresConfig()

Check warning on line 131 in extensions/Postgres/Postgres/PostgresConfig.cs

View workflow job for this annotation

GitHub Actions / Unit Tests (9.0.x, windows-latest)

Fix formatting (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/style-rules/ide0055)
{
this.Columns = new Dictionary<string, string>
{
Expand Down

0 comments on commit 8abde3c

Please sign in to comment.