| | 1 | | using MoreStructures.RecImmTrees; |
| | 2 | | using MoreStructures.RecImmTrees.Visitor; |
| | 3 | | using MoreStructures.SuffixTrees; |
| | 4 | | using MoreStructures.SuffixTries; |
| | 5 | |
|
| | 6 | | namespace MoreStructures.SuffixStructures.Matching; |
| | 7 | |
|
| | 8 | | /// <summary> |
| | 9 | | /// Base class for all <see cref="ISnssFinder"/> concretions which implement |
| | 10 | | /// <see cref="ISnssFinder.Find(IEnumerable{char}, IEnumerable{char})"/> using a suffix structure |
| | 11 | | /// (a concretions of <see cref="ISuffixStructureNode{TEdge, TNode}"/>, implementing |
| | 12 | | /// <see cref="IRecImmDictIndexedTreeNode{TEdge, TNode}"/>), such as <see cref="SuffixTreeNode"/> or |
| | 13 | | /// <see cref="SuffixTrieNode"/>). |
| | 14 | | /// </summary> |
| | 15 | | public abstract class SuffixStructureBasedSnssFinder : ISnssFinder |
| | 16 | | { |
| | 17 | | /// <summary> |
| | 18 | | /// A special char to be used as end delimiter for the first text, used for building the suffix structure. |
| | 19 | | /// Should not occur in first text, nor in the second, since a single suffix structure is built, embedding both |
| | 20 | | /// texts (a.k.a. generalized suffix structure). |
| | 21 | | /// </summary> |
| 142 | 22 | | public char Terminator1 { get; } |
| | 23 | |
|
| | 24 | | /// <summary> |
| | 25 | | /// A special char to be used as end delimiter for the second text, used for building the suffix structure. |
| | 26 | | /// Should not occur in first text, nor in the second, since a single suffix structure is built, embedding both |
| | 27 | | /// texts (a.k.a. generalized suffix structure). |
| | 28 | | /// </summary> |
| 132 | 29 | | public char Terminator2 { get; } |
| | 30 | |
|
| | 31 | | /// <summary> |
| | 32 | | /// Whether the two sequences of chars (first and second text) should be evaluated, in order to |
| | 33 | | /// make sure that are valid, i.e. they don't contain <see cref="Terminator1"/> nor <see cref="Terminator2"/>. |
| | 34 | | /// By default set to <see langword="true"/>. |
| | 35 | | /// </summary> |
| 72 | 36 | | public bool ValidateInput { get; init; } |
| | 37 | |
|
| | 38 | | /// <summary> |
| | 39 | | /// Builds an instance with the provided terminators, validating the input by default. All text passed |
| | 40 | | /// to <see cref="Find(IEnumerable{char}, IEnumerable{char})"/> must conform with these two terminators. |
| | 41 | | /// </summary> |
| | 42 | | /// <param name="terminator1"><inheritdoc cref="Terminator1" path="/summary"/></param> |
| | 43 | | /// <param name="terminator2"><inheritdoc cref="Terminator2" path="/summary"/></param> |
| 36 | 44 | | protected SuffixStructureBasedSnssFinder(char terminator1, char terminator2) |
| 36 | 45 | | { |
| 36 | 46 | | if (terminator1 == terminator2) |
| 4 | 47 | | throw new ArgumentException($"{nameof(terminator1)} and {nameof(terminator2)} must be different"); |
| | 48 | |
|
| 32 | 49 | | Terminator1 = terminator1; |
| 32 | 50 | | Terminator2 = terminator2; |
| 32 | 51 | | ValidateInput = true; |
| 32 | 52 | | } |
| | 53 | |
|
| | 54 | | /// <inheritdoc/> |
| | 55 | | public abstract IEnumerable<string> Find(IEnumerable<char> text1, IEnumerable<char> text2); |
| | 56 | |
|
| | 57 | | /// <summary> |
| | 58 | | /// Validates the provided texts against this finder, checking that they are compatible with |
| | 59 | | /// <see cref="Terminator1"/> and <see cref="Terminator2"/>. |
| | 60 | | /// </summary> |
| | 61 | | /// <param name="text1">The first text to validate.</param> |
| | 62 | | /// <param name="text2">The second text to validate.</param> |
| | 63 | | protected void ValidateTexts(IEnumerable<char> text1, IEnumerable<char> text2) |
| 40 | 64 | | { |
| 40 | 65 | | if (text1.Contains(Terminator1) || text1.Contains(Terminator2)) |
| 6 | 66 | | throw new ArgumentException( |
| 6 | 67 | | $"Should not contain {nameof(Terminator1)} nor {nameof(Terminator2)}: {Terminator1} {Terminator2}", |
| 6 | 68 | | nameof(text1)); |
| 34 | 69 | | if (text2.Contains(Terminator1) || text2.Contains(Terminator2)) |
| 6 | 70 | | throw new ArgumentException( |
| 6 | 71 | | $"Should not contain {nameof(Terminator1)} nor {nameof(Terminator2)}: {Terminator1} {Terminator2}", |
| 6 | 72 | | nameof(text2)); |
| 28 | 73 | | } |
| | 74 | |
|
| | 75 | | /// <summary> |
| | 76 | | /// Rebuilds the root-to-node prefix, from <paramref name="initialNode"/> up to the root of the Suffix Tree (node |
| | 77 | | /// with no parent), using the provided cache of visited nodes, <paramref name="cachedVisits"/>, to navigate the |
| | 78 | | /// Suffix Tree upwards. |
| | 79 | | /// </summary> |
| | 80 | | /// <param name="text">The text used to generate the Suffix Tree, and to be used to rebuild the prefix.</param> |
| | 81 | | /// <param name="initialNode">The node, to start navigating from.</param> |
| | 82 | | /// <param name="cachedVisits">A dictionary of visits by node, to jump from a node to its parent.</param> |
| | 83 | | /// <returns> |
| | 84 | | /// A lazily generated sequence of strings, corresponding to the edges from <paramref name="initialNode"/> up to |
| | 85 | | /// the root of the Suffix Tree. Empty if <paramref name="initialNode"/> is the root of the tree. |
| | 86 | | /// </returns> |
| | 87 | | protected static IEnumerable<string> CollectPrefixChars<TEdge, TNode>( |
| | 88 | | TextWithTerminator text, |
| | 89 | | TNode initialNode, |
| | 90 | | IDictionary<TNode, TreeTraversalVisit<TEdge, TNode>> cachedVisits) |
| | 91 | | where TEdge : IRecImmDictIndexedTreeEdge<TEdge, TNode>, TextWithTerminator.ISelector |
| | 92 | | where TNode : IRecImmDictIndexedTreeNode<TEdge, TNode> |
| 173 | 93 | | { |
| 173 | 94 | | var node = initialNode; |
| 438 | 95 | | while (true) |
| 438 | 96 | | { |
| 438 | 97 | | var (_, parentNode, incomingEdge, _) = cachedVisits[node]; |
| | 98 | |
|
| 438 | 99 | | if (parentNode == null) |
| 113 | 100 | | yield break; |
| | 101 | |
|
| 325 | 102 | | yield return text[incomingEdge!]; // if parentNode != null, then incomingEdge is also != null |
| 265 | 103 | | node = parentNode; |
| 265 | 104 | | } |
| | 105 | | } |
| | 106 | | } |