From 88bcd9f30e4afbdf56ecc5b1dfbf977242c0e429 Mon Sep 17 00:00:00 2001 From: Ahmed Yasin Koculu Date: Sat, 7 Sep 2024 23:23:49 +0200 Subject: [PATCH 1/4] Add backgrond indexing option on playground app and refactor. --- .../SearchEngineApp.cs | 7 +++- .../Index/IndexOfTokenRecordPreviousToken.cs | 32 +++++++++---------- .../Model/CompositeKeyOfRecordToken.cs | 4 ++- .../CompositeKeyOfTokenRecordPrevious.cs | 4 ++- .../SearchEngines/HashedSearchEngine.cs | 2 +- 5 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs b/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs index 887b636..0f5197d 100644 --- a/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs +++ b/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs @@ -20,6 +20,8 @@ public sealed class SearchEngineApp : IDisposable readonly bool UseDiacriticNormalizer = false; + readonly bool IndexInBackground = false; + readonly HashedSearchEngine SearchEngine; readonly RecordTable RecordTable; @@ -66,7 +68,10 @@ void MainMenu() { case "1": var o = ConfigureIndex(); - CreateIndex(o.indexPath, o.pattern, true); + if (IndexInBackground) + Task.Run(() => CreateIndex(o.indexPath, o.pattern, false)); + else + CreateIndex(o.indexPath, o.pattern, true); break; case "2": Search(); diff --git a/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs index 0f1080b..fb633c2 100644 --- a/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs @@ -20,6 +20,18 @@ public sealed class IndexOfTokenRecordPreviousToken where TRecord : unmanaged where TToken : unmanaged { + readonly bool useSecondaryIndex; + + bool isDropped; + + bool isDisposed; + + readonly SearchOnIndexOfTokenRecordPreviousToken + searchAlgorithm; + + readonly AdvancedSearchOnIndexOfTokenRecordPreviousToken + advancedSearchAlgorithm; + /// /// Gets the primary zone tree used to store and retrieve records by token and previous token. /// @@ -70,15 +82,10 @@ public bool IsReadOnly } } - readonly bool useSecondaryIndex; - - bool isDropped = false; - - readonly SearchOnIndexOfTokenRecordPreviousToken - searchAlgorithm; - - readonly AdvancedSearchOnIndexOfTokenRecordPreviousToken - advancedSearchAlgorithm; + /// + /// Returns true if the index is dropped, otherwise false. + /// + public bool IsIndexDropped { get => isDropped; } /// /// Initializes a new instance of the class, @@ -165,11 +172,6 @@ public void ThrowIfIndexIsDropped() IndexOfTokenRecordPreviousToken)} is dropped."); } - /// - /// Returns true if the index is dropped, otherwise false. - /// - public bool IsIndexDropped { get => isDropped; } - /// /// Evicts data from memory to disk in both primary and secondary zone trees. /// @@ -401,8 +403,6 @@ public TRecord[] Search( return advancedSearchAlgorithm.Search(query, cancellationToken); } - bool isDisposed = false; - /// /// Disposes the resources used by the index. /// diff --git a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordToken.cs b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordToken.cs index e0da32b..a038bfa 100644 --- a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordToken.cs +++ b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordToken.cs @@ -10,7 +10,9 @@ namespace ZoneTree.FullTextSearch.Model; /// The type of the record component of the key. Must be an unmanaged type. /// The type of the token component of the key. Must be an unmanaged type. [StructLayout(LayoutKind.Sequential)] -public struct CompositeKeyOfRecordToken : IEquatable> where TRecord : unmanaged +public struct CompositeKeyOfRecordToken + : IEquatable> + where TRecord : unmanaged where TToken : unmanaged { /// diff --git a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPrevious.cs b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPrevious.cs index ec6e281..756d213 100644 --- a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPrevious.cs +++ b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPrevious.cs @@ -10,7 +10,9 @@ namespace ZoneTree.FullTextSearch; /// The type of the record component of the key. Must be an unmanaged type. /// The type of the token components of the key. Must be an unmanaged type. [StructLayout(LayoutKind.Sequential)] -public struct CompositeKeyOfTokenRecordPrevious : IEquatable> where TRecord : unmanaged +public struct CompositeKeyOfTokenRecordPrevious + : IEquatable> + where TRecord : unmanaged where TToken : unmanaged { /// diff --git a/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs b/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs index d85f8fc..376349c 100644 --- a/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs +++ b/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs @@ -413,7 +413,7 @@ public void Drop() Index.Drop(); } - bool isDisposed = false; + bool isDisposed; /// /// Disposes the resources used by the search engine. From 3e4a9820120077d1828eac08b3e3b8a23904a71b Mon Sep 17 00:00:00 2001 From: Ahmed Yasin Koculu Date: Sat, 7 Sep 2024 23:33:44 +0200 Subject: [PATCH 2/4] Avoid redundant seeks by tracking unmatched records during the search process. --- src/ZoneTree.FullTextSearch/Directory.Build.props | 4 ++-- .../AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs | 6 ++++++ .../Search/SearchOnIndexOfTokenRecordPreviousToken.cs | 10 +++++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/ZoneTree.FullTextSearch/Directory.Build.props b/src/ZoneTree.FullTextSearch/Directory.Build.props index fdc54ac..61a34cb 100644 --- a/src/ZoneTree.FullTextSearch/Directory.Build.props +++ b/src/ZoneTree.FullTextSearch/Directory.Build.props @@ -5,8 +5,8 @@ Ahmed Yasin Koculu ZoneTree.FullTextSearch ZoneTree.FullTextSearch - 1.0.3.0 - 1.0.3.0 + 1.0.4.0 + 1.0.4.0 Ahmed Yasin Koculu ZoneTree.FullTextSearch ZoneTree.FullTextSearch is an open-source library that extends ZoneTree to provide efficient full-text search capabilities. It offers a fast, embedded search engine suitable for applications that require high performance and do not rely on external databases. diff --git a/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs index eab7631..703332a 100644 --- a/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs @@ -284,7 +284,10 @@ HashSet ProcessAllTokens( if (records.Contains(record)) continue; if (!DoesRecordMatchesTheQuery(query.QueryNode, record)) + { + skipRecords.Add(record); continue; + } if (off >= skip) { @@ -325,7 +328,10 @@ HashSet ProcessEntireIndex( if (records.Contains(record)) continue; if (!DoesRecordMatchesTheQuery(query.QueryNode, record)) + { + skipRecords.Add(record); continue; + } if (off >= skip) { diff --git a/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs index 0c62b67..e9c1131 100644 --- a/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs @@ -174,6 +174,7 @@ HashSet FindRecordsMatchingAllTokens( int skip, int limit) { + var skipRecords = new HashSet(); var records = new HashSet(); iterator1.Seek(new CompositeKeyOfTokenRecordPrevious() { @@ -198,11 +199,14 @@ HashSet FindRecordsMatchingAllTokens( // different previous token. if (records.Contains(record)) continue; - if (!DoesRecordContainAllTokens(tokens, record)) - continue; + if (skipRecords.Contains(record)) continue; - if (!DoesRecordContainAnyOfTheFacets(facets, record)) + if (!DoesRecordContainAllTokens(tokens, record) || + !DoesRecordContainAnyOfTheFacets(facets, record)) + { + skipRecords.Add(record); continue; + } if (off >= skip) { From 1a9b1764b5d10efe8f4210ca5f63ff81d3afaf5a Mon Sep 17 00:00:00 2001 From: Ahmed Yasin Koculu Date: Sat, 7 Sep 2024 23:40:18 +0200 Subject: [PATCH 3/4] Update ZoneTree version. --- .../Index/IndexOfTokenRecordPreviousToken.cs | 8 +++++--- .../ZoneTree.FullTextSearch.csproj | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs index fb633c2..1dfe6ad 100644 --- a/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs @@ -119,7 +119,8 @@ public IndexOfTokenRecordPreviousToken( tokenComparer = ComponentsForKnownTypes.GetComparer(); var factory1 = new ZoneTreeFactory, byte>() .SetDataDirectory($"{dataPath}/index1") - .SetIsValueDeletedDelegate((in byte x) => x == 1) + .SetIsDeletedDelegate( + (in CompositeKeyOfTokenRecordPrevious key, in byte value) => value == 1) .SetMarkValueDeletedDelegate((ref byte x) => x = 1) .SetKeySerializer(new StructSerializer>()) .SetComparer( @@ -142,7 +143,8 @@ public IndexOfTokenRecordPreviousToken( { var factory2 = new ZoneTreeFactory, byte>() .SetDataDirectory($"{dataPath}/index2") - .SetIsValueDeletedDelegate((in byte x) => x == 1) + .SetIsDeletedDelegate( + (in CompositeKeyOfRecordToken key, in byte value) => value == 1) .SetMarkValueDeletedDelegate((ref byte x) => x = 1) .SetKeySerializer(new StructSerializer>()) .SetComparer( @@ -239,7 +241,7 @@ public void UpsertRecord(TToken token, TRecord record, TToken previousToken) Record = record, Token = token, }; - ZoneTree2.TryAdd(key, new()); + ZoneTree2.TryAdd(key, new(), out _); } /// diff --git a/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj b/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj index e40f0f6..e7b1216 100644 --- a/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj +++ b/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj @@ -38,7 +38,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + From 0734bf41bdf8fd3877b0fede4d4b2037dc611af6 Mon Sep 17 00:00:00 2001 From: Ahmed Yasin Koculu Date: Sat, 7 Sep 2024 23:46:49 +0200 Subject: [PATCH 4/4] Revert skipRecords collection as records are ordered in simple search. --- .../Search/SearchOnIndexOfTokenRecordPreviousToken.cs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs index e9c1131..4da5f0e 100644 --- a/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs @@ -174,7 +174,6 @@ HashSet FindRecordsMatchingAllTokens( int skip, int limit) { - var skipRecords = new HashSet(); var records = new HashSet(); iterator1.Seek(new CompositeKeyOfTokenRecordPrevious() { @@ -199,14 +198,9 @@ HashSet FindRecordsMatchingAllTokens( // different previous token. if (records.Contains(record)) continue; - if (skipRecords.Contains(record)) continue; - if (!DoesRecordContainAllTokens(tokens, record) || !DoesRecordContainAnyOfTheFacets(facets, record)) - { - skipRecords.Add(record); continue; - } if (off >= skip) {