| | 1 | | using MoreStructures.Utilities; |
| | 2 | |
|
| | 3 | | namespace MoreStructures.SuffixArrays.CyclicShifts; |
| | 4 | |
|
| | 5 | | /// <summary> |
| | 6 | | /// An implementation of <see cref="IDoubleLengthPcsSorter"/> sorting of partial cyclic shifts (PCS) of length 2 * L |
| | 7 | | /// of a string by generating them and sorting with the LINQ-provided QuickSort. Ignores the provided order and |
| | 8 | | /// equivalence classes of the PCS of length L. |
| | 9 | | /// </summary> |
| | 10 | | /// <remarks> |
| | 11 | | /// <para id="advantages"> |
| | 12 | | /// ADVANTAGES AND DISADVANTAGES |
| | 13 | | /// <br/> |
| | 14 | | /// - Unlike other implementations, such as <see cref="CountingSortDoubleLengthPcsSorter"/>, this sorter needs to |
| | 15 | | /// know whether the input being provided includes a terminator (i.e. its last char is "special" as it uniquely |
| | 16 | | /// identify the end of the string) or not. |
| | 17 | | /// <br/> |
| | 18 | | /// - This information is required since this sorter performs sorting by actually comparing PCS. |
| | 19 | | /// <br/> |
| | 20 | | /// - Other implementations don't require such information, since they never compare PCS against each other: they |
| | 21 | | /// use externally provided data structures, from which they infer the order. |
| | 22 | | /// </para> |
| | 23 | | /// <para id="algo"> |
| | 24 | | /// ALGORITHM |
| | 25 | | /// <br/> |
| | 26 | | /// - For each index i of the input string, a cyclic shift starting at i and of length 2 * L is generated. |
| | 27 | | /// <br/> |
| | 28 | | /// - Cyclic shifts are sorted in ascending lexicographic order, using |
| | 29 | | /// <see cref="Enumerable.OrderBy{TSource, TKey}(IEnumerable{TSource}, Func{TSource, TKey})"/>. |
| | 30 | | /// <br/> |
| | 31 | | /// - Indexes i, corresponding to the sorted cyclic shifts, are returned as a list. |
| | 32 | | /// </para> |
| | 33 | | /// <para id="complexity"> |
| | 34 | | /// COMPLEXITY |
| | 35 | | /// <br/> |
| | 36 | | /// - There are always n cyclic shifts, where n is the length of the input string. |
| | 37 | | /// <br/> |
| | 38 | | /// - Each cyclic shift has length 2 * L: building it takes O(L) time and space. |
| | 39 | | /// <br/> |
| | 40 | | /// - Sorting the cyclic shifts via QuickSort takes O(n * log(n)) comparisons, between strings of length L. |
| | 41 | | /// <br/> |
| | 42 | | /// - Generating the output takes O(n * L) space. |
| | 43 | | /// <br/> |
| | 44 | | /// - Therefore, Time Complexity is O(n * log(n) * L) and Space Complexity is O(n * L) space. |
| | 45 | | /// <br/> |
| | 46 | | /// - If L is not constant, but rather O(n), Time Complexity is O(n^2 * log(n)) and Space Complexity is O(n^2). |
| | 47 | | /// </para> |
| | 48 | | /// </remarks> |
| | 49 | | public class NaiveDoubleLengthPcsSorter : IDoubleLengthPcsSorter |
| | 50 | | { |
| 16 | 51 | | private IComparer<string> PcsComparer { get; } |
| | 52 | |
|
| | 53 | | /// <summary> |
| | 54 | | /// The input string, whose PCS have to be sorted. |
| | 55 | | /// </summary> |
| 16 | 56 | | public string Input { get; } |
| | 57 | |
|
| | 58 | | /// <inheritdoc/> |
| 16 | 59 | | public int PcsLength { get; } |
| | 60 | |
|
| | 61 | | /// <summary> |
| | 62 | | /// <inheritdoc cref="NaiveDoubleLengthPcsSorter"/> |
| | 63 | | /// </summary> |
| | 64 | | /// <param name="input"><inheritdoc cref="Input" path="/summary"/></param> |
| | 65 | | /// <param name="pcsLength"><inheritdoc cref="PcsLength" path="/summary"/></param> |
| | 66 | | /// <param name="inputWithTerminator"> |
| | 67 | | /// Whether <paramref name="input"/> is terminated by a terminator char. If so, the last char of |
| | 68 | | /// <paramref name="input"/> will be treated as a terminator char when comparing PCS of the input. |
| | 69 | | /// Otherwise, <see cref="Comparer{T}.Default"/> for <see cref="string"/> will be used. |
| | 70 | | /// </param> |
| 20 | 71 | | public NaiveDoubleLengthPcsSorter(string input, int pcsLength, bool inputWithTerminator) |
| 20 | 72 | | { |
| 20 | 73 | | if (pcsLength <= 0 || pcsLength > input.Length) |
| 4 | 74 | | throw new ArgumentOutOfRangeException( |
| 4 | 75 | | nameof(pcsLength), $"Must be positive and at most the length of {nameof(input)}."); |
| | 76 | |
|
| 16 | 77 | | Input = input; |
| 16 | 78 | | PcsLength = pcsLength; |
| 16 | 79 | | PcsComparer = inputWithTerminator |
| 16 | 80 | | ? StringIncludingTerminatorComparer.Build(input[^1]) |
| 16 | 81 | | : Comparer<string>.Default; |
| 16 | 82 | | } |
| | 83 | |
|
| | 84 | |
|
| | 85 | | /// <inheritdoc path="//*[not(self::remarks)]"/> |
| | 86 | | /// <remarks> |
| | 87 | | /// <inheritdoc cref="NaiveDoubleLengthPcsSorter" path="/remarks"/> |
| | 88 | | /// </remarks> |
| | 89 | | public IList<int> Sort() => |
| 16 | 90 | | PcsUtils |
| 16 | 91 | | .ExtractPcsOf(Input, PcsLength * 2) |
| 73 | 92 | | .OrderBy(pcsAndIndex => pcsAndIndex.pcs, PcsComparer) |
| 73 | 93 | | .Select(pcsAndIndex => pcsAndIndex.index) |
| 16 | 94 | | .ToList(); |
| | 95 | | } |