| | 1 | | using MoreStructures.BurrowsWheelerTransform.Builders.LastFirstFinders; |
| | 2 | | using MoreStructures.SuffixArrays; |
| | 3 | |
|
| | 4 | | namespace MoreStructures.BurrowsWheelerTransform.Matching.Comparers; |
| | 5 | |
|
| | 6 | | /// <summary> |
| | 7 | | /// A <see cref="SuffixAgainstPatternComparer"/> which uses the provided <see cref="IndexModKPartialSuffixArray"/> to |
| | 8 | | /// find the suffix of the provided <see cref="SuffixAgainstPatternComparer.Text"/>, to compare against the provided |
| | 9 | | /// <see cref="SuffixAgainstPatternComparer.Pattern"/>. |
| | 10 | | /// </summary> |
| | 11 | | /// <remarks> |
| | 12 | | /// <para id="info"> |
| | 13 | | /// - This is a generalization of <see cref="SuffixArrayAgainstPatternComparer"/>, which also supports Partial |
| | 14 | | /// Suffix Arrays, i.e. Suffix Arrays with incomplete information, and in particular Partial Suffix Arrays of |
| | 15 | | /// type <see cref="IndexModKPartialSuffixArray"/>. |
| | 16 | | /// <br/> |
| | 17 | | /// - When K = 1, an instance of <see cref="IndexModKPartialSuffixArray"/> contains the index of all suffixes, and |
| | 18 | | /// is equivalent to a <see cref="SuffixArray"/>. |
| | 19 | | /// </para> |
| | 20 | | /// <para id="algo"> |
| | 21 | | /// ALGORITHM |
| | 22 | | /// <br/> |
| | 23 | | /// - Start from the index I(0) = first term of comparison passed to <see cref="Compare(int, int)"/>. |
| | 24 | | /// <br/> |
| | 25 | | /// - Perform Last-First mapping from I(i) to I(i+1) from BWT to Sorted BWT, until the index I(i) is present in |
| | 26 | | /// the Partial Suffix Array. While mapping, accumulate all indexes I(0), I(1), ... into a list I. |
| | 27 | | /// <br/> |
| | 28 | | /// - Once such index I(n) is found, iterate over I in reverse order, updating the Partial Suffix Array, from I(n) |
| | 29 | | /// all the way back to I(0). |
| | 30 | | /// <br/> |
| | 31 | | /// - Finally, invoke <see cref="SuffixAgainstPatternComparer.CompareSuffixAgainstPattern(int, int)"/> on the value |
| | 32 | | /// of the Partial Suffix Array at index I(0). |
| | 33 | | /// </para> |
| | 34 | | /// <para id="complexity"> |
| | 35 | | /// COMPLEXITY |
| | 36 | | /// <br/> |
| | 37 | | /// - There are at most K iterations, where K is the modulo of the Partial Suffix Array: at each iteration, the |
| | 38 | | /// Last-First mapping finds the index I(i+1), in the Sorted BWT, of a suffix which augments the previous suffix |
| | 39 | | /// by 1 (prepending a single char). |
| | 40 | | /// <br/> |
| | 41 | | /// - So there are at most K iterations to update the Partial Suffix Array with I(n), I(n-1), ... I(0). The Partial |
| | 42 | | /// Suffix Array ends up having at most n elements (1 per suffix of the text). |
| | 43 | | /// <br/> |
| | 44 | | /// - Finally, there is <see cref="SuffixAgainstPatternComparer.CompareSuffixAgainstPattern(int, int)"/>. |
| | 45 | | /// <br/> |
| | 46 | | /// - Therefore, Time Complexity is O(n * K) and Space Complexity is O(n). |
| | 47 | | /// </para> |
| | 48 | | /// </remarks> |
| | 49 | | public class IndexModKPartialSuffixArrayAgainstPatternComparer : SuffixAgainstPatternComparer |
| | 50 | | { |
| | 51 | | private readonly IDictionary<int, int> PartialSuffixArrayIndexes; |
| | 52 | | private readonly ILastFirstFinder LastFirstFinder; |
| | 53 | |
|
| | 54 | | /// <summary> |
| | 55 | | /// <inheritdoc cref="IndexModKPartialSuffixArrayAgainstPatternComparer"/> |
| | 56 | | /// </summary> |
| | 57 | | /// <param name="text">The text, to extract suffixes from via <paramref name="partialSuffixArray"/>.</param> |
| | 58 | | /// <param name="partialSuffixArray"> |
| | 59 | | /// The Partial Suffix Array of <paramref name="text"/>, to map (when the Partial Suffix Array contains such item) |
| | 60 | | /// the first term of comparison to the starting index in <paramref name="text"/> of the corresponding suffix. |
| | 61 | | /// </param> |
| | 62 | | /// <param name="pattern">The pattern, to compare against each suffix of <paramref name="text"/>.</param> |
| | 63 | | /// <param name="bwt"> |
| | 64 | | /// The Burrows-Wheeler Transform of <paramref name="text"/>. Required to instantiate a |
| | 65 | | /// <see cref="ILastFirstFinder"/>, to fill-in the gaps of the <paramref name="partialSuffixArray"/>. |
| | 66 | | /// </param> |
| | 67 | | /// <param name="sbwt"> |
| | 68 | | /// The sorted version of <paramref name="bwt"/>. Required to instantiate a <see cref="ILastFirstFinder"/>, to |
| | 69 | | /// fill-in the gaps of the <paramref name="partialSuffixArray"/>. |
| | 70 | | /// </param> |
| | 71 | | public IndexModKPartialSuffixArrayAgainstPatternComparer( |
| | 72 | | TextWithTerminator text, IndexModKPartialSuffixArray partialSuffixArray, IEnumerable<char> pattern, |
| | 73 | | RotatedTextWithTerminator bwt, RotatedTextWithTerminator sbwt) |
| 1 | 74 | | : base(text, pattern) |
| 1 | 75 | | { |
| 1 | 76 | | PartialSuffixArrayIndexes = new Dictionary<int, int>(partialSuffixArray.Indexes); |
| 1 | 77 | | LastFirstFinder = new PrecomputedFinder(bwt, sbwt); |
| 1 | 78 | | } |
| | 79 | |
|
| | 80 | | /// <inheritdoc path="//*[not(self::summary or self::remarks)]" /> |
| | 81 | | /// <summary> |
| | 82 | | /// Compares the suffix of text identified by the <paramref name="x"/>-th element of the Suffix Array, against the |
| | 83 | | /// pattern. Covers the gap in the provided <see cref="IndexModKPartialSuffixArray"/> by iteratively using the |
| | 84 | | /// Last-First Property between BWT and its Sorted version. |
| | 85 | | /// </summary> |
| | 86 | | /// <remarks> |
| | 87 | | /// <inheritdoc cref="IndexModKPartialSuffixArrayAgainstPatternComparer" path="/remarks"/> |
| | 88 | | /// </remarks> |
| | 89 | | public override int Compare(int x, int y) |
| 17 | 90 | | { |
| | 91 | | // Apply Last-First property until we get an index of the Sorted BWT for which we have the corresponding |
| | 92 | | // item in the Partial Suffix Array. |
| 17 | 93 | | var indexes = new List<int> { x }; |
| | 94 | | int suffixIndexStart; |
| 25 | 95 | | while (!PartialSuffixArrayIndexes.TryGetValue(indexes[^1], out suffixIndexStart)) |
| 8 | 96 | | indexes.Add(LastFirstFinder.LastToFirst(indexes[^1]).indexInSortedBWT); |
| | 97 | |
|
| 84 | 98 | | for (var i = indexes.Count - 1; i >= 0; i--) |
| 25 | 99 | | PartialSuffixArrayIndexes[indexes[i]] = suffixIndexStart + (indexes.Count - 1 - i); |
| | 100 | |
|
| 17 | 101 | | return CompareSuffixAgainstPattern(x, PartialSuffixArrayIndexes[x]); |
| 17 | 102 | | } |
| | 103 | | } |