| | 1 | | using MoreStructures.BurrowsWheelerTransform.Builders.LastFirstFinders; |
| | 2 | |
|
| | 3 | | namespace MoreStructures.BurrowsWheelerTransform.Builders; |
| | 4 | |
|
| | 5 | | /// <summary> |
| | 6 | | /// An extension of <see cref="NaiveBuilder"/> which takes advantange of the last-first property to reduce the |
| | 7 | | /// complexity of <see cref="IBuilder.InvertTransform(MoreStructures.RotatedTextWithTerminator)"/>. |
| | 8 | | /// </summary> |
| | 9 | | /// <remarks> |
| | 10 | | /// A <see cref="ILastFirstFinder"/>, built by <see cref="FirstLastFinderBuilder"/> is used to jump between the BWT |
| | 11 | | /// and its sorted version. |
| | 12 | | /// </remarks> |
| | 13 | | public class LastFirstPropertyBasedBuilder : NaiveBuilder |
| | 14 | | { |
| | 15 | | /// <summary> |
| | 16 | | /// The strategy by which this builder finds chars in the BWT and its sorted version. |
| | 17 | | /// </summary> |
| 102 | 18 | | public Func<RotatedTextWithTerminator, ILastFirstFinder> FirstLastFinderBuilder { get; init; } = |
| 153 | 19 | | (lastBWMColumn) => new PrecomputedFinder(lastBWMColumn, BWTransform.QuickSort(lastBWMColumn).sortedText); |
| | 20 | |
|
| | 21 | | /// <inheritdoc path="//*[not(self::remarks)]"/> |
| | 22 | | /// <remarks> |
| | 23 | | /// <inheritdoc path="/remarks/para[@id='terminator-required']"/> |
| | 24 | | /// <para id="algo"> |
| | 25 | | /// ALGORITHM |
| | 26 | | /// <br/> |
| | 27 | | /// This implementation inverts the BWT by using the last-first property. |
| | 28 | | /// <br/> |
| | 29 | | /// - First column of the matrix (sBWT) is just the last column (BWT), sorted. |
| | 30 | | /// <br/> |
| | 31 | | /// - By last-first property, the 1-st (and only) occurrence of terminator in sBWT at sBWT[0] corresponds |
| | 32 | | /// to the 1st occurrence of terminator in BWT at BWT[i0]. BWTs[i0] is the 1-st char of the text. |
| | 33 | | /// <br/> |
| | 34 | | /// - Again by last-first property, the n-th occurrence of c in BWTs at sBWTs[i0] corresponds to the n-th |
| | 35 | | /// occurrence of c in BWT at BWT[i1]. BWTs[i1] is the 2-st char of the text. |
| | 36 | | /// <br/> |
| | 37 | | /// - And so on, until BWTs[i(n-1)], the terminator, is reached. |
| | 38 | | /// </para> |
| | 39 | | /// <para id="complexity"> |
| | 40 | | /// COMPLEXITY |
| | 41 | | /// <br/> |
| | 42 | | /// - Before any iteration, Sorted BWT is computed, taking O(n * log(n)) time, where n is the length of |
| | 43 | | /// <paramref name="lastBWMColumn"/>. If the alphabet is of constant size sigma, Counting Sort reduces |
| | 44 | | /// the overall Time Complexity of this step to O(n). |
| | 45 | | /// <br/> |
| | 46 | | /// - After that the finder may also preallocate other supporting structures, to speed up searches (such |
| | 47 | | /// the dictionary used in <see cref="PrecomputedFinder"/>. Although it depends on the specific |
| | 48 | | /// implementation built by <see cref="FirstLastFinderBuilder"/>, we may assume this cost to also be |
| | 49 | | /// linear with n. |
| | 50 | | /// <br/> |
| | 51 | | /// - From terminator to terminator, there are n top-level iterations. Each iteration takes m1 + m2, |
| | 52 | | /// where m1 is the cost of <see cref="ILastFirstFinder.FindIndexOfNthOccurrenceInBWT(int, int)"/> |
| | 53 | | /// and m2 is the cost of <see cref="ILastFirstFinder.FindOccurrenceRankOfCharInSortedBWT(int)"/>. |
| | 54 | | /// <br/> |
| | 55 | | /// - Finally, the <see cref="StringBuilder"/> used as accumulator generates the text string. At most O(n). |
| | 56 | | /// <br/> |
| | 57 | | /// - So total Time Complexity is O(n * (m1 + m2)) and Space Complexity is O(n). |
| | 58 | | /// <br/> |
| | 59 | | /// <br/> |
| | 60 | | /// Using <see cref="NaiveFinder"/>, m1 and m2 are both O(n), so Time Complexity is O(n^2). |
| | 61 | | /// <br/> |
| | 62 | | /// Using <see cref="BinarySearchFinder"/>, m1 is O(n) and m2 is O(log(n)), so overall Time |
| | 63 | | /// Complexity is still O(n^2). |
| | 64 | | /// <br/> |
| | 65 | | /// Using <see cref="PrecomputedFinder"/>, m1 is O(1), whereas m2 is O(log(n / sigma)) where |
| | 66 | | /// sigma is the size of the alphabet, so overall Time Complexity is O(n * log(n)) if sigma is constant. |
| | 67 | | /// <br/> |
| | 68 | | /// </para> |
| | 69 | | /// </remarks> |
| | 70 | | public override TextWithTerminator InvertTransform(RotatedTextWithTerminator lastBWMColumn) |
| 15 | 71 | | { |
| 15 | 72 | | var firstLastFinder = FirstLastFinderBuilder(lastBWMColumn); |
| 15 | 73 | | var sbwt = firstLastFinder.SortedBWT; |
| | 74 | |
|
| 15 | 75 | | var reversedChars = GetReversedChars(sbwt, firstLastFinder); |
| 15 | 76 | | return new(reversedChars.Reverse(), lastBWMColumn.Terminator); |
| 15 | 77 | | } |
| | 78 | |
|
| | 79 | | private static IEnumerable<char> GetReversedChars( |
| | 80 | | RotatedTextWithTerminator sbwt, ILastFirstFinder firstLastFinder) |
| 45 | 81 | | { |
| 45 | 82 | | var terminator = sbwt.Terminator; |
| 45 | 83 | | var indexOfCharInSortedBWT = 0; // Start with '$', which is the 1st char in sbwt |
| | 84 | |
|
| | 85 | | char charToAppend; |
| | 86 | | do |
| 594 | 87 | | { |
| 594 | 88 | | (indexOfCharInSortedBWT, _) = firstLastFinder.LastToFirst(indexOfCharInSortedBWT); |
| 594 | 89 | | charToAppend = sbwt[indexOfCharInSortedBWT]; |
| 594 | 90 | | if (charToAppend != terminator) |
| 549 | 91 | | yield return charToAppend; |
| 594 | 92 | | } |
| 594 | 93 | | while (charToAppend != terminator); |
| 45 | 94 | | } |
| | 95 | | } |