-
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
v0.4.0: String queries inlcuding fuzzy search based on tries (#5)
# v0.4.0 🚀 - Prefix-Trie & Suffix-Trie implementations - Prefix index based on the Prefix-Trie - StartsWith-Queries for fast prefix matching - Fast fuzzy matching is supported as well - FullText index based on the Suffix-Trie - Contains-Queries for fast infix matching - Fast fuzzy matching is supported as well to support (complex) search scenarios - Also supports StartsWith queries - Updated Readme and Unit-Tests
- Loading branch information
Showing
28 changed files
with
1,266 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
131 changes: 131 additions & 0 deletions
131
Akade.IndexedSet.Tests/DataStructures/SuffixTrieTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
using Akade.IndexedSet.DataStructures; | ||
using Microsoft.VisualStudio.TestTools.UnitTesting; | ||
|
||
namespace Akade.IndexedSet.Tests.DataStructures; | ||
|
||
[TestClass] | ||
public class SuffixTrieTests | ||
{ | ||
private SuffixTrie<string> _trie = default!; | ||
|
||
[TestInitialize] | ||
public void TestInitializer() | ||
{ | ||
_trie = new(); | ||
_ = AddToStringTrie(_trie, "Tiger"); | ||
_ = AddToStringTrie(_trie, "Tarantula"); | ||
_ = AddToStringTrie(_trie, "Penguin"); | ||
_ = AddToStringTrie(_trie, "Panther"); | ||
_ = AddToStringTrie(_trie, "Pangolin"); | ||
_ = AddToStringTrie(_trie, "Parrot"); | ||
_ = AddToStringTrie(_trie, "Chihuahua"); | ||
} | ||
|
||
[TestMethod] | ||
public void querying_common_prefixes_return_correct_elements() | ||
{ | ||
CollectionAssert.AreEquivalent(new string[] { "Tiger", "Tarantula" }, _trie.GetAll("T").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Penguin", "Panther", "Pangolin", "Parrot" }, _trie.GetAll("P").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Panther", "Pangolin", "Parrot" }, _trie.GetAll("Pa").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Panther", "Pangolin" }, _trie.GetAll("Pan").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Panther" }, _trie.GetAll("Pant").ToArray()); | ||
} | ||
|
||
[TestMethod] | ||
public void querying_common_infixes_return_correct_elements() | ||
{ | ||
CollectionAssert.AreEquivalent(new string[] { "Panther", "Pangolin", "Tarantula" }, _trie.GetAll("an").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Chihuahua" }, _trie.GetAll("hua").ToArray()); | ||
} | ||
|
||
[TestMethod] | ||
public void querying_common_suffixes_return_correct_elements() | ||
{ | ||
CollectionAssert.AreEquivalent(new string[] { "Panther", "Tiger" }, _trie.GetAll("er").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Chihuahua" }, _trie.GetAll("hua").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Penguin", "Pangolin" }, _trie.GetAll("in").ToArray()); | ||
} | ||
|
||
[TestMethod] | ||
public void adding_the_same_element_return_false() | ||
{ | ||
SuffixTrie<string> trie = new(); | ||
|
||
Assert.IsTrue(AddToStringTrie(trie, "Cat")); | ||
Assert.IsFalse(AddToStringTrie(trie, "Cat")); | ||
} | ||
|
||
[TestMethod] | ||
public void contains_returns_correct_value_when_adding_elements() | ||
{ | ||
SuffixTrie<string> trie = new(); | ||
|
||
Assert.IsFalse(ContainsInStringTrie(trie, "Cat")); | ||
Assert.IsTrue(AddToStringTrie(trie, "Cat")); | ||
Assert.IsTrue(ContainsInStringTrie(trie, "Cat")); | ||
} | ||
|
||
[TestMethod] | ||
public void contains_returns_correct_value_when_removing_elements() | ||
{ | ||
SuffixTrie<string> trie = new(); | ||
|
||
_ = AddToStringTrie(trie, "Cat"); | ||
Assert.IsTrue(ContainsInStringTrie(trie, "Cat")); | ||
Assert.IsTrue(RemoveFromStringTrie(trie, "Cat")); | ||
Assert.IsFalse(ContainsInStringTrie(trie, "Cat")); | ||
} | ||
|
||
[TestMethod] | ||
public void removing_returns_false_if_the_element_is_not_present() | ||
{ | ||
SuffixTrie<string> trie = new(); | ||
Assert.IsFalse(RemoveFromStringTrie(trie, "Cat")); | ||
} | ||
|
||
[TestMethod] | ||
public void exact_fuzzy_search_with_single_result() | ||
{ | ||
IEnumerable<string> result = _trie.FuzzySearch("rantul", 1, true); | ||
Assert.AreEqual("Tarantula", result.Single()); | ||
} | ||
|
||
[TestMethod] | ||
public void exact_fuzzy_search_without_results() | ||
{ | ||
IEnumerable<string> result = _trie.FuzzySearch("Panner", 1, true); | ||
Assert.IsFalse(result.Any()); | ||
} | ||
|
||
[TestMethod] | ||
|
||
public void inexact_fuzzy_search_and_multiple_result() | ||
{ | ||
IEnumerable<string> result = _trie.FuzzySearch("Pan", 2, false); | ||
CollectionAssert.AreEquivalent(new[] { "Penguin", "Panther", "Pangolin", "Parrot", "Tarantula", "Chihuahua" }, result.ToArray()); | ||
} | ||
|
||
[TestMethod] | ||
public void inexact_fuzzy_search_without_result() | ||
{ | ||
IEnumerable<string> result = _trie.FuzzySearch("Non", 1, false); | ||
Assert.IsFalse(result.Any()); | ||
} | ||
|
||
|
||
// https://murilo.wordpress.com/2011/02/01/fast-and-easy-levenshtein-distance-using-a-trie-in-c/ | ||
private static bool AddToStringTrie(SuffixTrie<string> stringTrie, string value) | ||
{ | ||
return stringTrie.Add(value, value); | ||
} | ||
|
||
private static bool RemoveFromStringTrie(SuffixTrie<string> stringTrie, string value) | ||
{ | ||
return stringTrie.Remove(value, value); | ||
} | ||
|
||
private static bool ContainsInStringTrie(SuffixTrie<string> stringTrie, string value) | ||
{ | ||
return stringTrie.Contains(value, value); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
using Akade.IndexedSet.DataStructures; | ||
using Microsoft.VisualStudio.TestTools.UnitTesting; | ||
|
||
namespace Akade.IndexedSet.Tests.DataStructures; | ||
|
||
[TestClass] | ||
public class TrieTests | ||
{ | ||
private static Trie<string> GetAnimalTrie() | ||
{ | ||
Trie<string> trie = new(); | ||
|
||
_ = AddToStringTrie(trie, "Tiger"); | ||
_ = AddToStringTrie(trie, "Tarantula"); | ||
_ = AddToStringTrie(trie, "Penguin"); | ||
_ = AddToStringTrie(trie, "Panther"); | ||
_ = AddToStringTrie(trie, "Pangolin"); | ||
_ = AddToStringTrie(trie, "Parrot"); | ||
return trie; | ||
} | ||
|
||
[TestMethod] | ||
public void querying_common_prefixes_return_correct_elements() | ||
{ | ||
Trie<string> trie = GetAnimalTrie(); | ||
|
||
CollectionAssert.AreEquivalent(new string[] { "Tiger", "Tarantula" }, trie.GetAll("T").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Penguin", "Panther", "Pangolin", "Parrot" }, trie.GetAll("P").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Panther", "Pangolin", "Parrot" }, trie.GetAll("Pa").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Panther", "Pangolin" }, trie.GetAll("Pan").ToArray()); | ||
CollectionAssert.AreEquivalent(new string[] { "Panther" }, trie.GetAll("Pant").ToArray()); | ||
} | ||
|
||
[TestMethod] | ||
public void adding_the_same_element_returns_false() | ||
{ | ||
Trie<string> trie = new(); | ||
|
||
Assert.IsTrue(AddToStringTrie(trie, "Cat")); | ||
Assert.IsFalse(AddToStringTrie(trie, "Cat")); | ||
} | ||
|
||
[TestMethod] | ||
public void contains_returns_correct_value_when_adding_elements() | ||
{ | ||
Trie<string> trie = new(); | ||
|
||
Assert.IsFalse(ContainsInStringTrie(trie, "Cat")); | ||
Assert.IsTrue(AddToStringTrie(trie, "Cat")); | ||
Assert.IsTrue(ContainsInStringTrie(trie, "Cat")); | ||
} | ||
|
||
[TestMethod] | ||
public void contains_returns_correct_value_when_removing_elements() | ||
{ | ||
Trie<string> trie = new(); | ||
_ = AddToStringTrie(trie, "Cat"); | ||
|
||
Assert.IsTrue(ContainsInStringTrie(trie, "Cat")); | ||
Assert.IsTrue(RemoveFromStringTrie(trie, "Cat")); | ||
Assert.IsFalse(ContainsInStringTrie(trie, "Cat")); | ||
} | ||
|
||
[TestMethod] | ||
public void removing_returns_false_if_the_element_is_not_present() | ||
{ | ||
Trie<string> trie = new(); | ||
Assert.IsFalse(RemoveFromStringTrie(trie, "Cat")); | ||
} | ||
|
||
[TestMethod] | ||
public void exact_fuzzy_search_with_single_result() | ||
{ | ||
Trie<string> trie = GetAnimalTrie(); | ||
|
||
IEnumerable<string> result = trie.FuzzySearch("Panter", 1, true); | ||
Assert.AreEqual("Panther", result.Single()); | ||
} | ||
|
||
[TestMethod] | ||
public void exact_fuzzy_search_without_results() | ||
{ | ||
Trie<string> trie = GetAnimalTrie(); | ||
|
||
IEnumerable<string> result = trie.FuzzySearch("Panner", 1, true); | ||
Assert.IsFalse(result.Any()); | ||
} | ||
|
||
[TestMethod] | ||
public void inexact_fuzzy_search_and_multiple_result() | ||
{ | ||
Trie<string> trie = GetAnimalTrie(); | ||
|
||
IEnumerable<string> result = trie.FuzzySearch("Pan", 2, false); | ||
|
||
CollectionAssert.AreEquivalent(new[] { "Penguin", "Panther", "Pangolin", "Parrot" }, result.ToArray()); | ||
} | ||
|
||
[TestMethod] | ||
public void inexact_fuzzy_search_without_result() | ||
{ | ||
Trie<string> trie = GetAnimalTrie(); | ||
|
||
IEnumerable<string> result = trie.FuzzySearch("Non", 1, false); | ||
Assert.IsFalse(result.Any()); | ||
} | ||
|
||
private static bool AddToStringTrie(Trie<string> stringTrie, string value) | ||
{ | ||
return stringTrie.Add(value, value); | ||
} | ||
|
||
private static bool RemoveFromStringTrie(Trie<string> stringTrie, string value) | ||
{ | ||
return stringTrie.Remove(value, value); | ||
} | ||
|
||
private static bool ContainsInStringTrie(Trie<string> stringTrie, string value) | ||
{ | ||
return stringTrie.Contains(value, value); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
using Akade.IndexedSet.Tests.TestUtilities; | ||
using Microsoft.VisualStudio.TestTools.UnitTesting; | ||
|
||
namespace Akade.IndexedSet.Tests; | ||
|
||
[TestClass] | ||
public class FullTextIndices | ||
{ | ||
private record class Animal(string Name, string Category); | ||
|
||
private IndexedSet<Animal> _indexedSet = null!; | ||
private readonly Animal _bonobo = new("Bonobo", "Mammal"); | ||
private readonly Animal _booby = new("Booby", "Bird"); | ||
private readonly Animal _boomslang = new("Boomslang", "Reptile"); | ||
private readonly Animal _borador = new("Borador", "Mammal"); | ||
private readonly Animal _tiger = new("Tiger", "Mammal"); | ||
private readonly Animal _tarantula = new("Tarantula", "Spider"); | ||
private readonly Animal _tapir = new("Tapir", "Mammal"); | ||
private readonly Animal _penguin = new("Penguin", "Bird"); | ||
private readonly Animal _panther = new("Panther", "Mammal"); | ||
private readonly Animal _pangolin = new("Pangolin", "Mammal"); | ||
private readonly Animal _parrot = new("Parrot", "Bird"); | ||
|
||
|
||
[TestInitialize] | ||
public void Init() | ||
{ | ||
var data = new Animal[] { | ||
_bonobo, | ||
_booby, | ||
_boomslang, | ||
_borador, | ||
_tiger, | ||
_tarantula, | ||
_tapir, | ||
_penguin, | ||
_panther, | ||
_pangolin, | ||
_parrot, | ||
}; | ||
_indexedSet = data.ToIndexedSet() | ||
.WithFullTextIndex(x => x.Category.AsMemory()) | ||
.WithFullTextIndex(x => x.Name.AsMemory()) | ||
.Build(); | ||
} | ||
|
||
[TestMethod] | ||
public void single_item_retrieval_works() | ||
{ | ||
_indexedSet.AssertSingleItem(x => x.Category.AsMemory(), _boomslang); | ||
_indexedSet.AssertSingleItem(x => x.Category.AsMemory(), _tarantula); | ||
} | ||
|
||
[TestMethod] | ||
[ExpectedException(typeof(InvalidOperationException))] | ||
public void single_item_retrieval_throws_exception_if_there_is_more_than_one_result() | ||
{ | ||
_indexedSet.AssertSingleItem(x => x.Category.AsMemory(), _bonobo); | ||
} | ||
|
||
[TestMethod] | ||
public void multi_item_retrieval_works() | ||
{ | ||
_indexedSet.AssertMultipleItems(x => x.Category.AsMemory(), expectedElements: new[] { _bonobo, _borador, _tiger, _tapir, _panther, _pangolin }); | ||
_indexedSet.AssertMultipleItems(x => x.Category.AsMemory(), expectedElements: new[] { _booby, _penguin, _parrot }); | ||
} | ||
|
||
[TestMethod] | ||
public void search_via_starts_with() | ||
{ | ||
CollectionAssert.AreEquivalent(new[] { _booby, _boomslang }, _indexedSet.StartsWith(x => x.Name.AsMemory(), "Boo".AsMemory()).ToArray()); | ||
CollectionAssert.AreEquivalent(new[] { _panther, _pangolin }, _indexedSet.StartsWith(x => x.Name.AsMemory(), "Pan".AsMemory()).ToArray()); | ||
} | ||
|
||
|
||
[TestMethod] | ||
public void search_via_fuzzy_starts_with() | ||
{ | ||
CollectionAssert.AreEquivalent(new[] { _bonobo, _booby, _boomslang, _borador }, _indexedSet.FuzzyStartsWith(x => x.Name.AsMemory(), "Boo".AsMemory(), 1).ToArray()); | ||
CollectionAssert.AreEquivalent(new[] { _penguin, _parrot, _panther, _pangolin }, _indexedSet.FuzzyStartsWith(x => x.Name.AsMemory(), "Pan".AsMemory(), 1).ToArray()); | ||
} | ||
|
||
[TestMethod] | ||
public void search_via_contains() | ||
{ | ||
CollectionAssert.AreEquivalent(new[] { _boomslang, _tarantula, _panther, _pangolin }, _indexedSet.Contains(x => x.Name.AsMemory(), "an".AsMemory()).ToArray()); | ||
} | ||
|
||
|
||
[TestMethod] | ||
public void search_via_fuzzy_contains() | ||
{ | ||
Animal[] actual = _indexedSet.FuzzyContains(x => x.Name.AsMemory(), "Pan".AsMemory(), 1).ToArray(); | ||
CollectionAssert.AreEquivalent(new[] { _boomslang, _tarantula, _penguin, _parrot, _panther, _pangolin }, actual); | ||
} | ||
} |
Oops, something went wrong.