| | 1 | | using MoreStructures.SuffixTrees; |
| | 2 | |
|
| | 3 | | namespace MoreStructures.MutableTrees.Conversions; |
| | 4 | |
|
| | 5 | | /// <summary> |
| | 6 | | /// An implementation of <see cref="IConversion"/> using a <see cref="Stack{T}"/> to perform conversions. |
| | 7 | | /// </summary> |
| | 8 | | /// <remarks> |
| | 9 | | /// <para id="advantages"> |
| | 10 | | /// ADVANTAGES AND DISADVANTAGES |
| | 11 | | /// <br/> |
| | 12 | | /// <inheritdoc cref="DocFragments" path="/remarks/para[@id='fully-iterative-advantages']"/> |
| | 13 | | /// </para> |
| | 14 | | /// <para id="algorithm"> |
| | 15 | | /// ALGORITHM |
| | 16 | | /// <br/> |
| | 17 | | /// - The algorithm first visits the provided <see cref="MutableTree.Node"/> structure top-down, stacking up each |
| | 18 | | /// mutable node (root, intermediate and leaves) in custom "stack frame objects" onto a <see cref="Stack{T}"/> |
| | 19 | | /// data structure. |
| | 20 | | /// <br/> |
| | 21 | | /// - Then, it builds the output <see cref="SuffixTreeNode"/> structure node by node, bottom-up, trimming at the |
| | 22 | | /// same time the edges at the first encountered terminator. It does so by adding each |
| | 23 | | /// <see cref="MutableTree.Node"/> instance encountered in the top-down visit, which is not a leaf, to the same |
| | 24 | | /// <see cref="Stack{T}"/> instance used for the top-down visit. |
| | 25 | | /// <br/> |
| | 26 | | /// - Together with the mutable node being processed, the stack frame object also stores other data, required to |
| | 27 | | /// build the output structure. |
| | 28 | | /// <br/> |
| | 29 | | /// - 1. The incoming <see cref="SuffixTreeEdge"/>. It's non-nullable. |
| | 30 | | /// <br/> |
| | 31 | | /// - 2. The <see cref="IDictionary{TKey, TValue}"/> of (<see cref="SuffixTreeEdge"/>, |
| | 32 | | /// <see cref="SuffixTreeNode"/>) pairs for the children of the <see cref="SuffixTreeNode"/> parent of this |
| | 33 | | /// mutable node. Once fully populated, the dictionary will be used to build the immutable parent node. It's |
| | 34 | | /// non-nullable. |
| | 35 | | /// <br/> |
| | 36 | | /// - 3. The one for the children of the <see cref="SuffixTreeNode"/> corresponding to this mutable node. |
| | 37 | | /// Once fully populated, the dictionary will be used to build the immutable node. It's nullable. |
| | 38 | | /// <br/> |
| | 39 | | /// - A special treatment is reserved for the root mutable node, which doesn't have an incoming edge, nor a parent |
| | 40 | | /// node and the stack frame of which initializes the stack before top-down traversal: dummy objects are created. |
| | 41 | | /// <br/> |
| | 42 | | /// - Then top-down traversal of the tree is done, by processing the frame on top of the stack while the stack is |
| | 43 | | /// not empty. |
| | 44 | | /// <br/> |
| | 45 | | /// - During tree descent, mutable nodes of children (intermediate and leaves) are added to the stack without |
| | 46 | | /// dictionary of children. |
| | 47 | | /// <br/> |
| | 48 | | /// - Such dictionary is instantiated when visiting each node during descent and passed to the stack frames of each |
| | 49 | | /// the children, so that each children can add the <see cref="SuffixTreeNode"/> to the collection of children of |
| | 50 | | /// its parent during the bottom-up traversal phase. |
| | 51 | | /// <br/> |
| | 52 | | /// - After instantiating the dictionary of children, the node being visited is re-added to the top of the stack. |
| | 53 | | /// <br/> |
| | 54 | | /// - Then, children of the node (and their incoming edges) are added to the top of the stack, in reverse order, |
| | 55 | | /// so that children are visited in straight order and before the parent. |
| | 56 | | /// <br/> |
| | 57 | | /// - While stacking up children, edge labels are trimmed to the 1st encountered terminator, if any. |
| | 58 | | /// </para> |
| | 59 | | /// <para id="complexity"> |
| | 60 | | /// COMPLEXITY |
| | 61 | | /// <br/> |
| | 62 | | /// - Each node is added to the stack at most twice: once during the top-down visit (when the dictionary of |
| | 63 | | /// children is not set) and once during the bottom-up visit (when the dictionary of children is set) - the |
| | 64 | | /// second scenario only happening for non-leaves. |
| | 65 | | /// <br/> |
| | 66 | | /// - Operations per stack frame processing are all done in constant time: stack frame deconstruction, stack depth |
| | 67 | | /// check, children count, children dictionary instantiation, <see cref="SuffixTreeNode.Leaf"/> and |
| | 68 | | /// <see cref="SuffixTreeNode.Intermediate"/> creation. |
| | 69 | | /// <br/> |
| | 70 | | /// - There is one exception though: edge trimming takes time O(n * m), where n is the length of the text and m |
| | 71 | | /// is the number of terminators, hence the number of concatenated <see cref="TextWithTerminator"/>. |
| | 72 | | /// <br/> |
| | 73 | | /// - In conclusion, Time Complexity is O(n^2 * m) and Space Complexity is O(n). |
| | 74 | | /// </para> |
| | 75 | | /// </remarks> |
| | 76 | | internal class FullyIterativeConversion : IConversion |
| | 77 | | { |
| | 78 | | /// <inheritdoc path="//*[not(self::remarks)]"/> |
| | 79 | | /// <remarks> |
| | 80 | | /// <inheritdoc cref="FullyIterativeConversion"/> |
| | 81 | | /// </remarks> |
| | 82 | | public SuffixTreeNode ConvertToSuffixTree( |
| | 83 | | MutableTree.Node mutableNode, TextWithTerminator fullText, ISet<char> terminators) |
| 59 | 84 | | { |
| 59 | 85 | | var stack = new Stack<StackFrame>(); |
| 59 | 86 | | var rootIncomingEdge = new SuffixTreeEdge(0, 1); |
| 59 | 87 | | var rootParentChildren = new Dictionary<SuffixTreeEdge, SuffixTreeNode>(); |
| 59 | 88 | | stack.Push(new(mutableNode, rootIncomingEdge, rootParentChildren, null)); |
| | 89 | |
|
| 59 | 90 | | var terminatorsIndexMap = |
| 59 | 91 | | TextWithTerminatorExtensions.BuildTerminatorsIndexMap(fullText, terminators).ToList(); |
| | 92 | |
|
| 884 | 93 | | while (stack.Count > 0) |
| 825 | 94 | | ProcessStack(stack, terminatorsIndexMap); |
| | 95 | |
|
| 59 | 96 | | return rootParentChildren[rootIncomingEdge]; |
| 59 | 97 | | } |
| | 98 | |
|
| 825 | 99 | | private sealed record StackFrame( |
| 1650 | 100 | | MutableTree.Node MutableNode, |
| 1650 | 101 | | SuffixTreeEdge IncomingEdge, |
| 1650 | 102 | | IDictionary<SuffixTreeEdge, SuffixTreeNode> ParentChildren, |
| 2475 | 103 | | IDictionary<SuffixTreeEdge, SuffixTreeNode>? Children); |
| | 104 | |
|
| | 105 | | private static void ProcessStack(Stack<StackFrame> stack, IList<int> terminatorsIndexMap) |
| 825 | 106 | | { |
| 825 | 107 | | var (mutableNode, incomingEdge, parentChildren, children) = stack.Pop(); |
| | 108 | |
|
| 825 | 109 | | if (!mutableNode.Children.Any()) |
| 415 | 110 | | { |
| | 111 | | // It is a leaf => generate node and store into parentChildren |
| 415 | 112 | | parentChildren[incomingEdge] = new SuffixTreeNode.Leaf(mutableNode.LeafStart!.Value); |
| 415 | 113 | | return; |
| | 114 | | } |
| | 115 | |
|
| 410 | 116 | | if (children != null) |
| 205 | 117 | | { |
| | 118 | | // Intermediate, whose children have already been processed => generate node and store into parentChildren |
| 205 | 119 | | parentChildren[incomingEdge] = new SuffixTreeNode.Intermediate(children); |
| 205 | 120 | | return; |
| | 121 | | } |
| | 122 | |
|
| | 123 | | // Intermediate, whose children have not been processed yet => re-push mutable node, this time with a reference |
| | 124 | | // to a newly created dictionary of children, then push children onto the stack, to have them processed first |
| | 125 | | // and populating the instantiated children dictionary. |
| 205 | 126 | | children = new Dictionary<SuffixTreeEdge, SuffixTreeNode>(); |
| 205 | 127 | | stack.Push(new(mutableNode, incomingEdge, parentChildren, children)); |
| 1737 | 128 | | foreach (var (childMutableEdge, childMutableNode) in mutableNode.Children.Reverse()) |
| 561 | 129 | | { |
| 561 | 130 | | var childEdge = new SuffixTreeEdge(childMutableEdge.Start, childMutableEdge.Length); |
| | 131 | |
|
| | 132 | | // If the child edge contains any terminator, trim tree |
| 561 | 133 | | if (terminatorsIndexMap[childEdge.Start + childEdge.Length - 1] != terminatorsIndexMap[childEdge.Start]) |
| 72 | 134 | | { |
| 72 | 135 | | childMutableNode.Children.Clear(); |
| 72 | 136 | | childEdge = new SuffixTreeEdge(childMutableEdge.Start, |
| 72 | 137 | | terminatorsIndexMap[childEdge.Start] - childMutableEdge.Start + 1); |
| 72 | 138 | | } |
| | 139 | |
|
| 561 | 140 | | stack.Push(new(childMutableNode, childEdge, children, null)); |
| 561 | 141 | | } |
| 825 | 142 | | } |
| | 143 | | } |