| | 1 | | using MoreStructures.Utilities; |
| | 2 | |
|
| | 3 | | namespace MoreStructures.BurrowsWheelerTransform.Matching.Comparers; |
| | 4 | |
|
| | 5 | | /// <summary> |
| | 6 | | /// An <see cref="IComparer{T}"/> of <see cref="int"/>, which compares a suffix <see cref="string"/> of |
| | 7 | | /// <see cref="Text"/> against the provided <see cref="string"/> pattern, and ignores the second <see cref="int"/> |
| | 8 | | /// value. |
| | 9 | | /// </summary> |
| | 10 | | public abstract class SuffixAgainstPatternComparer : IComparer<int> |
| | 11 | | { |
| | 12 | | /// <summary> |
| | 13 | | /// The text, to extract suffixes from. |
| | 14 | | /// </summary> |
| 222 | 15 | | public TextWithTerminator Text { get; } |
| | 16 | |
|
| | 17 | | /// <summary> |
| | 18 | | /// The pattern, to compare against each suffix of <see cref="Text"/>. |
| | 19 | | /// </summary> |
| 222 | 20 | | public IEnumerable<char> Pattern { get; } |
| | 21 | |
|
| | 22 | | /// <summary> |
| | 23 | | /// The <see cref="IComparer{T}"/> of <see cref="char"/>, to be used to compare single chars of the strings to |
| | 24 | | /// compare (suffixes and pattern). |
| | 25 | | /// </summary> |
| 425 | 26 | | public IComparer<char> CharComparer { get; } |
| | 27 | |
|
| | 28 | | /// <summary> |
| | 29 | | /// The value of the first term of comparison, which resulted in <see cref="LongestMatch"/> chars matched, |
| | 30 | | /// when comparing the suffix starting at <see cref="LongestMatchFirstValue"/> against the pattern. |
| | 31 | | /// </summary> |
| | 32 | | /// <remarks> |
| | 33 | | /// If multiple values of the first term resulted in the same amount of chars matched, the first value |
| | 34 | | /// encountered is kept. |
| | 35 | | /// </remarks> |
| 232 | 36 | | public int LongestMatchFirstValue { get; protected set; } = -1; |
| | 37 | |
|
| | 38 | | /// <summary> |
| | 39 | | /// The maximum amount of chars of the pattern matched, since the instantiation of this comparer. |
| | 40 | | /// </summary> |
| | 41 | | /// <remarks> |
| | 42 | | /// It is never reset. To start over, a new instance of this comparer has to be created. |
| | 43 | | /// </remarks> |
| 513 | 44 | | public int LongestMatch { get; protected set; } = 0; |
| | 45 | |
|
| | 46 | | /// <summary> |
| | 47 | | /// <inheritdoc cref="SuffixAgainstPatternComparer"/> |
| | 48 | | /// </summary> |
| | 49 | | /// <param name="text"><inheritdoc cref="Text"/></param> |
| | 50 | | /// <param name="pattern"><inheritdoc cref="Pattern"/></param> |
| 58 | 51 | | protected SuffixAgainstPatternComparer(TextWithTerminator text, IEnumerable<char> pattern) |
| 58 | 52 | | { |
| 58 | 53 | | Text = text; |
| 58 | 54 | | Pattern = pattern; |
| 58 | 55 | | CharComparer = CharOrTerminatorComparer.Build(text.Terminator); |
| 58 | 56 | | } |
| | 57 | |
|
| | 58 | | /// <summary> |
| | 59 | | /// Compares the suffix of text identified by <paramref name="x"/> against the pattern. |
| | 60 | | /// </summary> |
| | 61 | | /// <param name="x"> |
| | 62 | | /// The index, in the complete Suffix Array, of the suffix which is first term of comparison. |
| | 63 | | /// </param> |
| | 64 | | /// <param name="y">Ignored.</param> |
| | 65 | | /// <returns> |
| | 66 | | /// A positive value if there is mismatch and the suffix is bigger than the pattern lexicographically. |
| | 67 | | /// <br/> |
| | 68 | | /// A negative value if there is mismatch and the suffix is smaller than the pattern lexicographically. |
| | 69 | | /// <br/> |
| | 70 | | /// The value 0 if there is full match and pattern and text are of the same length or pattern is shorter. |
| | 71 | | /// <br/> |
| | 72 | | /// The value -1 if there is full match but the pattern is longer than the suffix. |
| | 73 | | /// </returns> |
| | 74 | | public abstract int Compare(int x, int y); |
| | 75 | |
|
| | 76 | | /// <summary> |
| | 77 | | /// Compares the suffix of <see cref="Text"/> starting at index <paramref name="suffixStartIndex"/> against |
| | 78 | | /// <see cref="Pattern"/>, returning the result of the comparison as a positive, null or negative |
| | 79 | | /// <see cref="int"/>. |
| | 80 | | /// </summary> |
| | 81 | | /// <param name="bwtIndex"> |
| | 82 | | /// The index, in the Burrows-Wheeler Transform, of the first char of the suffix starting at |
| | 83 | | /// <paramref name="suffixStartIndex"/>. |
| | 84 | | /// </param> |
| | 85 | | /// <param name="suffixStartIndex"> |
| | 86 | | /// The index of <see cref="Text"/> where <paramref name="suffixStartIndex"/> starts. |
| | 87 | | /// </param> |
| | 88 | | /// <returns> |
| | 89 | | /// A positive <see cref="int"/>, if the suffix is lexicographically bigger than <see cref="Pattern"/>. |
| | 90 | | /// <br/> |
| | 91 | | /// A negative <see cref="int"/>, if the suffix is lexicographically smaller than <see cref="Pattern"/>. |
| | 92 | | /// <br/> |
| | 93 | | /// Zero, when there is a match, and the suffix is either of longer or of the same length as <see cref="Pattern"/>. |
| | 94 | | /// </returns> |
| | 95 | | protected virtual int CompareSuffixAgainstPattern(int bwtIndex, int suffixStartIndex) |
| 222 | 96 | | { |
| 222 | 97 | | using var firstEnumerator = Text[suffixStartIndex..].GetEnumerator(); |
| 222 | 98 | | using var secondEnumerator = Pattern.GetEnumerator(); |
| | 99 | |
|
| 222 | 100 | | int numberOfItemsMatched = 0; |
| | 101 | | bool firstHasValue, secondHasValue; |
| 503 | 102 | | while (true) |
| 503 | 103 | | { |
| 503 | 104 | | firstHasValue = firstEnumerator.MoveNext(); |
| 503 | 105 | | secondHasValue = secondEnumerator.MoveNext(); |
| | 106 | |
|
| 503 | 107 | | if (!firstHasValue || !secondHasValue) |
| 78 | 108 | | break; |
| | 109 | |
|
| 425 | 110 | | var itemsComparison = CharComparer.Compare(firstEnumerator.Current, secondEnumerator.Current); |
| 425 | 111 | | if (itemsComparison != 0) |
| 144 | 112 | | return itemsComparison; |
| | 113 | |
|
| 281 | 114 | | if (++numberOfItemsMatched > LongestMatch) |
| 143 | 115 | | { |
| 143 | 116 | | LongestMatch = numberOfItemsMatched; |
| 143 | 117 | | LongestMatchFirstValue = bwtIndex; |
| 143 | 118 | | } |
| 281 | 119 | | } |
| | 120 | |
|
| 78 | 121 | | if (secondHasValue) |
| 1 | 122 | | return -1; // The pattern is longer than the suffix |
| | 123 | |
|
| | 124 | | // Either the pattern is shorter than the suffix, or pattern and suffix have equal length. |
| | 125 | | // In either case, the match is successful. |
| 77 | 126 | | return 0; |
| 222 | 127 | | } |
| | 128 | | } |