| | 1 | | using MoreStructures.Utilities; |
| | 2 | |
|
| | 3 | | namespace MoreStructures.SuffixArrays.LongestCommonPrefix; |
| | 4 | |
|
| | 5 | | /// <summary> |
| | 6 | | /// An implementation of <see cref="ILcpArrayBuilder"/> which calculates the LCP Array using the definition. |
| | 7 | | /// </summary> |
| | 8 | | /// <remarks> |
| | 9 | | /// <para id="advantages"> |
| | 10 | | /// ADVANTAGES AND DISADVANTAGES |
| | 11 | | /// <br/> |
| | 12 | | /// - This implementation is the most straightforward, as the algorithm closely follows the definition. |
| | 13 | | /// <br/> |
| | 14 | | /// - As such it is easy to implement and analyze, at the cost of worse Time and Space complexity than smarter |
| | 15 | | /// implementations, using, for example, specific properties of subsequent LCP items of suffixes from the |
| | 16 | | /// Suffix Array. |
| | 17 | | /// <br/> |
| | 18 | | /// - The algorithm is also online. Therefore, values can be lazily computed. |
| | 19 | | /// </para> |
| | 20 | | /// <para id="algorithm"> |
| | 21 | | /// ALGORITHM |
| | 22 | | /// <br/> |
| | 23 | | /// - The implementation uses the externally provided <see cref="SuffixArray"/>, in combination with the |
| | 24 | | /// <see cref="Text"/>. |
| | 25 | | /// <br/> |
| | 26 | | /// - It iterates over the first n - 1 items of the <see cref="SuffixArray"/> SA, using |
| | 27 | | /// <see cref="StringUtilities.LongestCommonPrefix(IEnumerable{char}, IEnumerable{char})"/> to calculate the |
| | 28 | | /// length of the LCP between SA[i] and its successor, SA[i + 1]. |
| | 29 | | /// <br/> |
| | 30 | | /// - That represents the i-th element of the LCP Array. |
| | 31 | | /// </para> |
| | 32 | | /// <para id="complexity"> |
| | 33 | | /// COMPLEXITY |
| | 34 | | /// <br/> |
| | 35 | | /// - The <see cref="SuffixArray"/> is externally provided, so it's building cost is not included in this analysis. |
| | 36 | | /// <br/> |
| | 37 | | /// - The algorithm runs n - 1 iterations, each one building two strings (the prefix starting at SA[i] and the one |
| | 38 | | /// starting at SA[i + 1]), then comparing them char by char, to find the LCP. |
| | 39 | | /// <br/> |
| | 40 | | /// - Prefixes have in general O(n) length, and |
| | 41 | | /// <see cref="StringUtilities.LongestCommonPrefix(IEnumerable{char}, IEnumerable{char})"/> has Time Complexity |
| | 42 | | /// linear in the input, and constant Space Complexity. |
| | 43 | | /// <br/> |
| | 44 | | /// - Therefore, Time Complexity is O(n^2) and Space Complexity is O(n). |
| | 45 | | /// </para> |
| | 46 | | /// </remarks> |
| | 47 | | public class NaiveLcpArrayBuilder : ILcpArrayBuilder |
| | 48 | | { |
| | 49 | | /// <summary> |
| | 50 | | /// The terminator-terminated string, to calculate the <see cref="LcpArray"/> of. |
| | 51 | | /// </summary> |
| 148 | 52 | | public TextWithTerminator Text { get; } |
| | 53 | |
|
| | 54 | | /// <summary> |
| | 55 | | /// The Suffix Array of the <see cref="Text"/>, required to calculate the <see cref="LcpArray"/>. |
| | 56 | | /// </summary> |
| 48 | 57 | | public SuffixArray SuffixArray { get; } |
| | 58 | |
|
| | 59 | | /// <summary> |
| | 60 | | /// <inheritdoc cref="NaiveLcpArrayBuilder"/> |
| | 61 | | /// </summary> |
| | 62 | | /// <remarks> |
| | 63 | | /// <inheritdoc cref="NaiveLcpArrayBuilder"/> |
| | 64 | | /// </remarks> |
| | 65 | | /// <param name="text"> |
| | 66 | | /// <inheritdoc cref="Text" path="/summary"/> |
| | 67 | | /// </param> |
| | 68 | | /// <param name="suffixArray"> |
| | 69 | | /// <inheritdoc cref="SuffixArray" path="/summary"/> |
| | 70 | | /// </param> |
| 41 | 71 | | public NaiveLcpArrayBuilder(TextWithTerminator text, SuffixArray suffixArray) |
| 41 | 72 | | { |
| 41 | 73 | | Text = text; |
| 41 | 74 | | SuffixArray = suffixArray; |
| 41 | 75 | | } |
| | 76 | |
|
| | 77 | | /// <summary> |
| | 78 | | /// <inheritdoc/> |
| | 79 | | /// </summary> |
| | 80 | | /// <remarks> |
| | 81 | | /// <inheritdoc cref="NaiveLcpArrayBuilder"/> |
| | 82 | | /// </remarks> |
| | 83 | | public virtual LcpArray Build() => |
| 7 | 84 | | new( |
| 7 | 85 | | from suffixIndexStartingAndNext in SuffixArray.Indexes.Zip(SuffixArray.Indexes.Skip(1)) |
| 40 | 86 | | let suffix = Text[suffixIndexStartingAndNext.First..] |
| 40 | 87 | | let nextSuffix = Text[suffixIndexStartingAndNext.Second..] |
| 40 | 88 | | select StringUtilities.LongestCommonPrefix(suffix, nextSuffix) |
| 7 | 89 | | ); |
| | 90 | | } |