| | 1 | | using MoreStructures.SuffixStructures.Builders; |
| | 2 | |
|
| | 3 | | namespace MoreStructures.SuffixTries.Builders; |
| | 4 | |
|
| | 5 | | /// <summary> |
| | 6 | | /// Builds objects, such as edges and nodes, for <see cref="SuffixTrieNode"/> structures. |
| | 7 | | /// </summary> |
| | 8 | | /// <remarks> |
| | 9 | | /// <para id="algo"> |
| | 10 | | /// ALGORITHM |
| | 11 | | /// <br/> |
| | 12 | | /// Implemented iteratively, with one level of recursion per char of each suffix of the input |
| | 13 | | /// <see cref="TextWithTerminator"/> (where the longest suffix is the text itself). |
| | 14 | | /// </para> |
| | 15 | | /// <para id="advantages"> |
| | 16 | | /// ADVANTAGES AND DISADVANTAGES |
| | 17 | | /// <br/> |
| | 18 | | /// Limited by call stack depth and usable with input text of a "reasonable" length (i.e. string having a length |
| | 19 | | /// < ~1K chars). |
| | 20 | | /// </para> |
| | 21 | | /// <para id="complexity"> |
| | 22 | | /// COMPLEXITY |
| | 23 | | /// <br/> |
| | 24 | | /// - Time Complexity = O(t^2 * as) and Space Complexity = O(t^2) where t = length of the text to match and |
| | 25 | | /// as = size of the alphabet of the text. |
| | 26 | | /// <br/> |
| | 27 | | /// - If the alphabet is of constant size, complexity is quadratic. |
| | 28 | | /// </para> |
| | 29 | | /// </remarks> |
| | 30 | | public class NaivePartiallyRecursiveSuffixTrieBuilder |
| | 31 | | : IBuilder<SuffixTrieEdge, SuffixTrieNode> |
| | 32 | | { |
| | 33 | | /// <inheritdoc path="//*[not(self::summary or self::remarks)]"/> |
| | 34 | | /// <summary> |
| | 35 | | /// <inheritdoc/> |
| | 36 | | /// </summary> |
| | 37 | | /// <remarks> |
| | 38 | | /// <inheritdoc cref="NaivePartiallyRecursiveSuffixTrieBuilder"/> |
| | 39 | | /// </remarks> |
| | 40 | | public SuffixTrieNode BuildTree(params TextWithTerminator[] texts) |
| 50 | 41 | | { |
| 50 | 42 | | var (fullText, terminators) = texts.GenerateFullText(); |
| | 43 | |
|
| 50 | 44 | | SuffixTrieNode root = new SuffixTrieNode.Leaf(0); |
| | 45 | |
|
| 730 | 46 | | for (var suffixIndex = 0; suffixIndex < fullText.Length; suffixIndex++) |
| 315 | 47 | | root = IncludeSuffixIntoTrie(fullText, terminators, suffixIndex, suffixIndex, root); |
| | 48 | |
|
| 50 | 49 | | return root; |
| 50 | 50 | | } |
| | 51 | |
|
| | 52 | | private static SuffixTrieNode.Intermediate IncludeSuffixIntoTrie( |
| | 53 | | TextWithTerminator fullText, ISet<char> separators, int suffixCurrentIndex, int suffixIndex, |
| | 54 | | SuffixTrieNode node) |
| 556 | 55 | | { |
| 556 | 56 | | var nodeChildren = new Dictionary<SuffixTrieEdge, SuffixTrieNode>(node.Children); |
| 556 | 57 | | var currentChar = fullText[suffixCurrentIndex]; |
| 1527 | 58 | | var childEdgeAndNode = nodeChildren.SingleOrDefault(c => fullText[c.Key.Index] == currentChar); |
| | 59 | |
|
| 556 | 60 | | if (childEdgeAndNode is (SuffixTrieEdge edge, SuffixTrieNode child)) |
| 241 | 61 | | { |
| | 62 | | // Recurse on the child, removing the first char from the current suffix |
| 241 | 63 | | nodeChildren[edge] = IncludeSuffixIntoTrie( |
| 241 | 64 | | fullText, |
| 241 | 65 | | separators, |
| 241 | 66 | | suffixCurrentIndex: suffixCurrentIndex + 1, |
| 241 | 67 | | suffixIndex: suffixIndex, |
| 241 | 68 | | node: child); |
| 241 | 69 | | } |
| | 70 | | else |
| 315 | 71 | | { |
| | 72 | | // Build the entire path down to the first non-terminator char |
| 315 | 73 | | child = new SuffixTrieNode.Leaf(suffixIndex); |
| | 74 | |
|
| 315 | 75 | | var indexOfFirstTerminator = Enumerable |
| 315 | 76 | | .Range(suffixCurrentIndex, fullText.Length - suffixCurrentIndex) |
| 1110 | 77 | | .First(i => separators.Contains(fullText[i])); |
| | 78 | |
|
| 1590 | 79 | | for (int i = indexOfFirstTerminator; i >= suffixCurrentIndex + 1; i--) |
| 480 | 80 | | { |
| 480 | 81 | | child = new SuffixTrieNode.Intermediate( |
| 480 | 82 | | new Dictionary<SuffixTrieEdge, SuffixTrieNode> { [new(i)] = child }); |
| 480 | 83 | | } |
| 315 | 84 | | nodeChildren[new(suffixCurrentIndex)] = child; |
| 315 | 85 | | } |
| | 86 | |
|
| 556 | 87 | | return new SuffixTrieNode.Intermediate(nodeChildren); |
| 556 | 88 | | } |
| | 89 | | } |