| | 1 | | using MoreStructures.Utilities; |
| | 2 | |
|
| | 3 | | namespace MoreStructures.BurrowsWheelerTransform; |
| | 4 | |
|
| | 5 | | /// <summary> |
| | 6 | | /// The Burrows-Wheeler Transform (BWT) of a <see cref="TextWithTerminator"/> <paramref name="Text"/> is a permutation |
| | 7 | | /// of the chars of <paramref name="Text"/> which corresponds to the <see cref="BWMatrix.LastColumn"/> of the |
| | 8 | | /// <see cref="BWMatrix"/> of <paramref name="Text"/>. |
| | 9 | | /// </summary> |
| | 10 | | /// <param name="Text">The text to calculate the BWT of.</param> |
| | 11 | | /// <param name="Content">The string which corresponds to the transform of the text.</param> |
| | 12 | | /// <remarks> |
| | 13 | | /// This <see langword="record"/> is a typed wrapped of the underlying <see langword="string"/> representing the BWT. |
| | 14 | | /// It guarantes immutability and strong typing, and also keeps together the <see cref="Text"/> and its transform |
| | 15 | | /// <see cref="Content"/>. |
| | 16 | | /// </remarks> |
| 3879 | 17 | | public record BWTransform(TextWithTerminator Text, RotatedTextWithTerminator Content) |
| 1029 | 18 | | { |
| 1029 | 19 | | /// <summary> |
| 1029 | 20 | | /// Any strategy to sort the <see cref="char"/> of a <see cref="RotatedTextWithTerminator"/>, for example to turn |
| 1029 | 21 | | /// a BWT into its sorted version. |
| 1029 | 22 | | /// </summary> |
| 1029 | 23 | | /// <param name="text">The text to be sorted.</param> |
| 1029 | 24 | | /// <param name="comparer"> |
| 1029 | 25 | | /// The <see cref="IComparer{T}"/> of <see cref="char"/> to be used for comparison. |
| 1029 | 26 | | /// If not specified, a <see cref="CharOrTerminatorComparer"/> using the |
| 1029 | 27 | | /// <see cref="RotatedTextWithTerminator.Terminator"/> of <paramref name="text" /> is used instead. |
| 1029 | 28 | | /// </param> |
| 1029 | 29 | | /// <returns> |
| 1029 | 30 | | /// A new <see cref="RotatedTextWithTerminator"/>, sorted according to the provided <paramref name="comparer"/>, |
| 1029 | 31 | | /// together with a <see cref="IEnumerable{T}"/> of <see cref="int"/>, defining the mapping of the index of each |
| 1029 | 32 | | /// char of the input text into the index of that char in the sorted version of the text. |
| 1029 | 33 | | /// </returns> |
| 1029 | 34 | | public delegate (RotatedTextWithTerminator sortedText, IEnumerable<int> indexesMapping) SortStrategy( |
| 1029 | 35 | | RotatedTextWithTerminator text, IComparer<char>? comparer = null); |
| 1029 | 36 | |
|
| 1029 | 37 | | /// <summary> |
| 1029 | 38 | | /// A strategy to sort a <see cref="RotatedTextWithTerminator"/> using |
| 1029 | 39 | | /// <see cref="Enumerable.OrderBy{TSource, TKey}(IEnumerable{TSource}, Func{TSource, TKey}, IComparer{TKey}?)"/>, |
| 1029 | 40 | | /// which in turn uses a QuickSort with Time Complexity = O(n * log(n)) in average and O(n^2) in the worst |
| 1029 | 41 | | /// (unlikely) case. |
| 1029 | 42 | | /// </summary> |
| 1029 | 43 | | /// <remarks> |
| 1029 | 44 | | /// Tipically used to sort the Burrows-Wheeler Transform. |
| 1029 | 45 | | /// </remarks> |
| 1 | 46 | | public static readonly SortStrategy QuickSort = |
| 1 | 47 | | (text, charComparer) => |
| 200 | 48 | | { |
| 200 | 49 | | charComparer ??= CharOrTerminatorComparer.Build(text.Terminator); |
| 1 | 50 | |
|
| 77220 | 51 | | var textWithIndexes = text.Select((c, i) => (c, i)); |
| 77220 | 52 | | var sortedTextWithIndexes = textWithIndexes.OrderBy(charAndIndex => charAndIndex.c, charComparer); |
| 200 | 53 | | var sortedText = new RotatedTextWithTerminator( |
| 38107 | 54 | | sortedTextWithIndexes.Select(charAndIndex => charAndIndex.c), |
| 200 | 55 | | text.Terminator); |
| 215 | 56 | | var indexesMapping = sortedTextWithIndexes.Select(charAndIndex => charAndIndex.i); |
| 200 | 57 | | return (sortedText, indexesMapping); |
| 201 | 58 | | }; |
| 1029 | 59 | |
|
| 1029 | 60 | | /// <summary> |
| 1029 | 61 | | /// The length of this transform, which corresponds to the length of <see cref="Content"/>. |
| 1029 | 62 | | /// </summary> |
| 22 | 63 | | public int Length => Content.Length; |
| 1029 | 64 | | } |