| | | 1 | | using MoreStructures.Utilities; |
| | | 2 | | |
| | | 3 | | namespace MoreStructures.SuffixArrays.LongestCommonPrefix; |
| | | 4 | | |
| | | 5 | | /// <summary> |
| | | 6 | | /// An implementation of <see cref="ILcpArrayBuilder"/> which calculates the LCP Array using the definition. |
| | | 7 | | /// </summary> |
| | | 8 | | /// <remarks> |
| | | 9 | | /// <para id="advantages"> |
| | | 10 | | /// ADVANTAGES AND DISADVANTAGES |
| | | 11 | | /// <br/> |
| | | 12 | | /// - This implementation is the most straightforward, as the algorithm closely follows the definition. |
| | | 13 | | /// <br/> |
| | | 14 | | /// - As such it is easy to implement and analyze, at the cost of worse Time and Space complexity than smarter |
| | | 15 | | /// implementations, using, for example, specific properties of subsequent LCP items of suffixes from the |
| | | 16 | | /// Suffix Array. |
| | | 17 | | /// <br/> |
| | | 18 | | /// - The algorithm is also online. Therefore, values can be lazily computed. |
| | | 19 | | /// </para> |
| | | 20 | | /// <para id="algorithm"> |
| | | 21 | | /// ALGORITHM |
| | | 22 | | /// <br/> |
| | | 23 | | /// - The implementation uses the externally provided <see cref="SuffixArray"/>, in combination with the |
| | | 24 | | /// <see cref="Text"/>. |
| | | 25 | | /// <br/> |
| | | 26 | | /// - It iterates over the first n - 1 items of the <see cref="SuffixArray"/> SA, using |
| | | 27 | | /// <see cref="StringUtilities.LongestCommonPrefix(IEnumerable{char}, IEnumerable{char})"/> to calculate the |
| | | 28 | | /// length of the LCP between SA[i] and its successor, SA[i + 1]. |
| | | 29 | | /// <br/> |
| | | 30 | | /// - That represents the i-th element of the LCP Array. |
| | | 31 | | /// </para> |
| | | 32 | | /// <para id="complexity"> |
| | | 33 | | /// COMPLEXITY |
| | | 34 | | /// <br/> |
| | | 35 | | /// - The <see cref="SuffixArray"/> is externally provided, so it's building cost is not included in this analysis. |
| | | 36 | | /// <br/> |
| | | 37 | | /// - The algorithm runs n - 1 iterations, each one building two strings (the prefix starting at SA[i] and the one |
| | | 38 | | /// starting at SA[i + 1]), then comparing them char by char, to find the LCP. |
| | | 39 | | /// <br/> |
| | | 40 | | /// - Prefixes have in general O(n) length, and |
| | | 41 | | /// <see cref="StringUtilities.LongestCommonPrefix(IEnumerable{char}, IEnumerable{char})"/> has Time Complexity |
| | | 42 | | /// linear in the input, and constant Space Complexity. |
| | | 43 | | /// <br/> |
| | | 44 | | /// - Therefore, Time Complexity is O(n^2) and Space Complexity is O(n). |
| | | 45 | | /// </para> |
| | | 46 | | /// </remarks> |
| | | 47 | | public class NaiveLcpArrayBuilder : ILcpArrayBuilder |
| | | 48 | | { |
| | | 49 | | /// <summary> |
| | | 50 | | /// The terminator-terminated string, to calculate the <see cref="LcpArray"/> of. |
| | | 51 | | /// </summary> |
| | 148 | 52 | | public TextWithTerminator Text { get; } |
| | | 53 | | |
| | | 54 | | /// <summary> |
| | | 55 | | /// The Suffix Array of the <see cref="Text"/>, required to calculate the <see cref="LcpArray"/>. |
| | | 56 | | /// </summary> |
| | 48 | 57 | | public SuffixArray SuffixArray { get; } |
| | | 58 | | |
| | | 59 | | /// <summary> |
| | | 60 | | /// <inheritdoc cref="NaiveLcpArrayBuilder"/> |
| | | 61 | | /// </summary> |
| | | 62 | | /// <remarks> |
| | | 63 | | /// <inheritdoc cref="NaiveLcpArrayBuilder"/> |
| | | 64 | | /// </remarks> |
| | | 65 | | /// <param name="text"> |
| | | 66 | | /// <inheritdoc cref="Text" path="/summary"/> |
| | | 67 | | /// </param> |
| | | 68 | | /// <param name="suffixArray"> |
| | | 69 | | /// <inheritdoc cref="SuffixArray" path="/summary"/> |
| | | 70 | | /// </param> |
| | 41 | 71 | | public NaiveLcpArrayBuilder(TextWithTerminator text, SuffixArray suffixArray) |
| | 41 | 72 | | { |
| | 41 | 73 | | Text = text; |
| | 41 | 74 | | SuffixArray = suffixArray; |
| | 41 | 75 | | } |
| | | 76 | | |
| | | 77 | | /// <summary> |
| | | 78 | | /// <inheritdoc/> |
| | | 79 | | /// </summary> |
| | | 80 | | /// <remarks> |
| | | 81 | | /// <inheritdoc cref="NaiveLcpArrayBuilder"/> |
| | | 82 | | /// </remarks> |
| | | 83 | | public virtual LcpArray Build() => |
| | 7 | 84 | | new( |
| | 7 | 85 | | from suffixIndexStartingAndNext in SuffixArray.Indexes.Zip(SuffixArray.Indexes.Skip(1)) |
| | 40 | 86 | | let suffix = Text[suffixIndexStartingAndNext.First..] |
| | 40 | 87 | | let nextSuffix = Text[suffixIndexStartingAndNext.Second..] |
| | 40 | 88 | | select StringUtilities.LongestCommonPrefix(suffix, nextSuffix) |
| | 7 | 89 | | ); |
| | | 90 | | } |