| | 1 | | using MoreStructures.RecImmTrees; |
| | 2 | | using MoreStructures.Strings.Matching; |
| | 3 | | using static MoreStructures.Utilities.StringUtilities; |
| | 4 | |
|
| | 5 | | namespace MoreStructures.SuffixStructures.Matching; |
| | 6 | |
|
| | 7 | | /// <summary> |
| | 8 | | /// Exposes utility methods to match a <see cref="TextWithTerminator"/> against a |
| | 9 | | /// <see cref="ISuffixStructureNode{TEdge, TNode}"/> concretion. |
| | 10 | | /// </summary> |
| | 11 | | public static class Matcher |
| | 12 | | { |
| | 13 | | /// <summary> |
| | 14 | | /// Tries to match a pattern against a <see cref="ISuffixStructureNode{TEdge, TNode}"/> built on a text. |
| | 15 | | /// </summary> |
| | 16 | | /// <param name="node">The root of the Suffix Tree, to match the suffix of text against.</param> |
| | 17 | | /// <param name="text">The text whose Suffix Tree has to be matched against the pattern.</param> |
| | 18 | | /// <param name="pattern">The pattern to match. Unlike text, is a string without terminator.</param> |
| | 19 | | /// <typeparam name="TEdge"> |
| | 20 | | /// <inheritdoc cref="IRecImmDictIndexedTreeNode{TEdge, TNode}" path="/typeparam[@name='TEdge']"/> |
| | 21 | | /// </typeparam> |
| | 22 | | /// <typeparam name="TNode"> |
| | 23 | | /// <inheritdoc cref="IRecImmDictIndexedTreeNode{TEdge, TNode}" path="/typeparam[@name='TNode']"/> |
| | 24 | | /// </typeparam> |
| | 25 | | /// <returns>A successful or non-successful match.</returns> |
| | 26 | | /// <remarks> |
| | 27 | | /// <para id="complexity"> |
| | 28 | | /// COMPLEXITY |
| | 29 | | /// <br/> |
| | 30 | | /// Time Complexity = O(t * as) and Space Complexity = O(t * as) where t = length of the text to match and |
| | 31 | | /// as = size of the alphabet of the text. If the alphabet is of constant size, complexity is linear. |
| | 32 | | /// </para> |
| | 33 | | /// </remarks> |
| | 34 | | public static Match<TreePath<TEdge, TNode>> Match<TEdge, TNode>( |
| | 35 | | this ISuffixStructureNode<TEdge, TNode> node, |
| | 36 | | TextWithTerminator text, |
| | 37 | | string pattern) |
| | 38 | | where TEdge : ISuffixStructureEdge<TEdge, TNode> |
| | 39 | | where TNode : ISuffixStructureNode<TEdge, TNode> => |
| 28 | 40 | | Match(node, text, pattern, 0); |
| | 41 | |
|
| | 42 | | private static Match<TreePath<TEdge, TNode>> Match<TEdge, TNode>( |
| | 43 | | this ISuffixStructureNode<TEdge, TNode> node, |
| | 44 | | TextWithTerminator text, |
| | 45 | | string pattern, |
| | 46 | | int textStart) |
| | 47 | | where TEdge : ISuffixStructureEdge<TEdge, TNode> |
| | 48 | | where TNode : ISuffixStructureNode<TEdge, TNode> |
| 59 | 49 | | { |
| 59 | 50 | | if (string.IsNullOrEmpty(pattern)) |
| 1 | 51 | | throw new ArgumentException("Must be non-null and non-emtpy.", nameof(pattern)); |
| | 52 | |
|
| 58 | 53 | | var longestMatch = ( |
| 58 | 54 | | from edge in node.Children.Keys |
| 206 | 55 | | let length = LongestCommonPrefix(edge.Of(text), pattern[textStart..]) |
| 206 | 56 | | select new { Length = length, Edge = edge }) |
| 264 | 57 | | .MaxBy(r => r.Length); |
| | 58 | |
|
| 58 | 59 | | if (longestMatch == null) |
| 5 | 60 | | return new Match<TreePath<TEdge, TNode>>( |
| 5 | 61 | | false, textStart, 0, new()); |
| | 62 | |
|
| 53 | 63 | | if (longestMatch.Length == pattern.Length - textStart) |
| 22 | 64 | | return new Match<TreePath<TEdge, TNode>>( |
| 22 | 65 | | true, textStart + longestMatch.Edge.Start, longestMatch.Length, new()); |
| | 66 | |
|
| | 67 | | // The edge has been fully matched but pattern is longer => chars left to match |
| 31 | 68 | | var childNode = node.Children[longestMatch.Edge]; |
| 31 | 69 | | var childMatch = Match(childNode, text, pattern, textStart + longestMatch.Length); |
| 31 | 70 | | var pathToChild = new TreePath<TEdge, TNode>(longestMatch.Edge, childNode); |
| 31 | 71 | | return new Match<TreePath<TEdge, TNode>>( |
| 31 | 72 | | childMatch.Success, |
| 31 | 73 | | textStart + longestMatch.Edge.Start, |
| 31 | 74 | | longestMatch.Length + childMatch.MatchedChars, |
| 31 | 75 | | pathToChild.Concat(childMatch.Path)); |
| 58 | 76 | | } |
| | 77 | | } |