| | 1 | | using MoreStructures.RecImmTrees; |
| | 2 | | using MoreStructures.RecImmTrees.Visitor; |
| | 3 | | using MoreStructures.SuffixStructures; |
| | 4 | |
|
| | 5 | | namespace MoreStructures.SuffixArrays.Builders; |
| | 6 | |
|
| | 7 | | /// <summary> |
| | 8 | | /// An algorithm for building the <see cref="SuffixArray"/> from an already built |
| | 9 | | /// <see cref="ISuffixStructureNode{TEdge, TNode}"/> structure for the provided <see cref="TextWithTerminator"/>. |
| | 10 | | /// </summary> |
| | 11 | | /// <typeparam name="TEdge">The type of edges of the specific structure.</typeparam> |
| | 12 | | /// <typeparam name="TNode">The type of nodes of the specific structure.</typeparam> |
| | 13 | | /// <remarks> |
| | 14 | | /// <para id="algo"> |
| | 15 | | /// The Suffix Tree for the text "mississippi$" is: |
| | 16 | | /// <code> |
| | 17 | | /// R |
| | 18 | | /// - $ (11) |
| | 19 | | /// - i |
| | 20 | | /// - $ (10) |
| | 21 | | /// - ssi |
| | 22 | | /// - p..$ (7) |
| | 23 | | /// - s..$ (4) |
| | 24 | | /// - m..$ |
| | 25 | | /// - ... |
| | 26 | | /// </code> |
| | 27 | | /// A DFS visit of the leaves of the tree, navigating edges in lexicographic order, gives { 11, 10, 7, 4, ... }, |
| | 28 | | /// which is the <see cref="SuffixArray"/> of "mississippi$". |
| | 29 | | /// </para> |
| | 30 | | /// <para id="complexity"> |
| | 31 | | /// - The Suffix Tree of the text is provided as input, and the complexity of building or keeping it in memoery |
| | 32 | | /// is not considered here. |
| | 33 | | /// <br/> |
| | 34 | | /// - The DFS of the Tree is linear in m, where m is the number of nodes in the Suffix Tree. m is O(n) for Suffix |
| | 35 | | /// Trees and O(n^2) for Suffix Tries, where n is the length of the text. |
| | 36 | | /// <br/> |
| | 37 | | /// - While DFS itself is linear in m, edges have to be sorted in lexicographic order, which is done via the LINQ |
| | 38 | | /// <see cref="Enumerable.OrderBy{TSource, TKey}(IEnumerable{TSource}, Func{TSource, TKey})"/> method. |
| | 39 | | /// <br/> |
| | 40 | | /// - There are O(m) edges in the tree, so sorting can be O(m * log(m)) if the alphabet of the text is O(n), using |
| | 41 | | /// QuickSort or similar, and O(m) if the alphabet is constant w.r.t. to n, using Counting Sort or similar. |
| | 42 | | /// <br/> |
| | 43 | | /// - For each visited node, constant work is done: checking whether the node is a leaf and potentially returning |
| | 44 | | /// it. |
| | 45 | | /// <br/> |
| | 46 | | /// - Therefore Time Complexity is O(n^2 * log(n)) for a Suffix Trie with a non-constant alphabet, O(n^2) for a |
| | 47 | | /// Suffix Trie with a constant alphabet, O(n * log(n)) for a Suffix Tree with a non-constant alphabet and |
| | 48 | | /// O(n) for a Suffix Tree with a constant alphabet. |
| | 49 | | /// <br/> |
| | 50 | | /// - Space Complexity is dominated by the space required to sort edges in lexicographic order, before visiting |
| | 51 | | /// them in DFS: O(n^2) for a Suffix Trie and O(n) for a Suffix Tree. |
| | 52 | | /// </para> |
| | 53 | | /// </remarks> |
| | 54 | | public class SuffixStructureBasedSuffixArrayBuilder<TEdge, TNode> : ISuffixArrayBuilder |
| | 55 | | where TEdge : ISuffixStructureEdge<TEdge, TNode> |
| | 56 | | where TNode : ISuffixStructureNode<TEdge, TNode> |
| | 57 | | { |
| | 58 | | private readonly IVisitStrategy<TNode, TreeTraversalVisit<TEdge, TNode>> DepthFirstTraversal; |
| | 59 | |
|
| | 60 | | /// <summary> |
| | 61 | | /// The <see cref="TextWithTerminator"/>, to build the <see cref="SuffixArray"/> of. |
| | 62 | | /// </summary> |
| 472 | 63 | | public TextWithTerminator Text { get; } |
| | 64 | |
|
| | 65 | | /// <summary> |
| | 66 | | /// The root node of the <see cref="ISuffixStructureNode{TEdge, TNode}"/> structure, to build the |
| | 67 | | /// <see cref="SuffixArray"/> of. |
| | 68 | | /// </summary> |
| 14 | 69 | | public TNode Node { get; } |
| | 70 | |
|
| | 71 | | /// <summary> |
| | 72 | | /// <inheritdoc cref="SuffixStructureBasedSuffixArrayBuilder{TEdge, TNode}" path="/summary"/> |
| | 73 | | /// </summary> |
| | 74 | | /// <param name="text"><inheritdoc cref="Text" path="/summary"/></param> |
| | 75 | | /// <param name="node"><inheritdoc cref="Node" path="/summary"/></param> |
| 14 | 76 | | public SuffixStructureBasedSuffixArrayBuilder(TextWithTerminator text, TNode node) |
| 14 | 77 | | { |
| 14 | 78 | | Text = text; |
| 14 | 79 | | Node = node; |
| 14 | 80 | | DepthFirstTraversal = new FullyRecursiveDepthFirstTraversal<TEdge, TNode>() |
| 14 | 81 | | { |
| 972 | 82 | | ChildrenSorter = visit => visit.Node.Children.OrderBy(kvp => Text[kvp.Key]), |
| 14 | 83 | | TraversalOrder = TreeTraversalOrder.ChildrenFirst |
| 14 | 84 | | }; |
| 14 | 85 | | } |
| | 86 | |
|
| | 87 | | /// <inheritdoc path="//*[self::summary or self::remarks]"/> |
| | 88 | | /// <summary> |
| | 89 | | /// Builds the <see cref="SuffixArray"/> for <see name="Node"/>. |
| | 90 | | /// </summary> |
| 14 | 91 | | public SuffixArray Build() => new( |
| 14 | 92 | | from visit in DepthFirstTraversal.Visit(Node) |
| 500 | 93 | | let node = visit.Node |
| 500 | 94 | | where node.IsLeaf() |
| 186 | 95 | | select node.Start!.Value); |
| | 96 | | } |