| | | 1 | | using MoreStructures.Utilities; |
| | | 2 | | |
| | | 3 | | namespace MoreStructures.SuffixArrays.CyclicShifts; |
| | | 4 | | |
| | | 5 | | /// <summary> |
| | | 6 | | /// A <see cref="ISingleCharPcsClassifier"/> implementation which calculate equivalence classes using the definition. |
| | | 7 | | /// </summary> |
| | | 8 | | /// <remarks> |
| | | 9 | | /// <inheritdoc cref="ISingleCharPcsClassifier"/> |
| | | 10 | | /// <para id="advantages"> |
| | | 11 | | /// ADVANTAGES AND DISADVANTAGES |
| | | 12 | | /// <br/> |
| | | 13 | | /// - Compared to more advanced implementations, such as <see cref="OrderBasedSingleCharPcsClassifier"/>, it only |
| | | 14 | | /// requires the <see cref="Input"/>, to calculate equivalence classes. |
| | | 15 | | /// <br/> |
| | | 16 | | /// - However, its runtime is worse, both in Time and Space Complexity, than a solution based on position list, |
| | | 17 | | /// where the effort of sorting the chars has been already done previously. |
| | | 18 | | /// </para> |
| | | 19 | | /// <para id="algo"> |
| | | 20 | | /// ALGORITHM |
| | | 21 | | /// <br/> |
| | | 22 | | /// - Go though each char of the <see cref="Input"/>. |
| | | 23 | | /// <br/> |
| | | 24 | | /// - For each char, count the distinct chars of <see cref="Input"/> which are strictly smaller. |
| | | 25 | | /// <br/> |
| | | 26 | | /// - Build an output list of the result. |
| | | 27 | | /// </para> |
| | | 28 | | /// <para id="complexity"> |
| | | 29 | | /// COMPLEXITY |
| | | 30 | | /// <br/> |
| | | 31 | | /// - For each of the n chars of the <see cref="Input"/>, a pass of all n chars of <see cref="Input"/> is done, to |
| | | 32 | | /// count to keep the ones which are strictly smaller. |
| | | 33 | | /// <br/> |
| | | 34 | | /// - Count the distinct occurrences is a O(n) operation in time and space, done via the LINQ method |
| | | 35 | | /// <see cref="Enumerable.Distinct{TSource}(IEnumerable{TSource})"/>, which uses an internal lightweight |
| | | 36 | | /// implementation of a set. |
| | | 37 | | /// <br/> |
| | | 38 | | /// - The output is a list of n items. |
| | | 39 | | /// <br/> |
| | | 40 | | /// - Therefore, Time Complexity is O(n^2) and Space Complexity is O(n). |
| | | 41 | | /// </para> |
| | | 42 | | /// </remarks> |
| | | 43 | | public class NaiveSingleCharPcsClassifier : ISingleCharPcsClassifier |
| | | 44 | | { |
| | 278 | 45 | | private IComparer<char> CharsComparer { get; } |
| | | 46 | | |
| | | 47 | | /// <inheritdoc/> |
| | 53 | 48 | | public string Input { get; } |
| | | 49 | | |
| | | 50 | | /// <summary> |
| | | 51 | | /// <inheritdoc cref="OrderBasedSingleCharPcsClassifier"/> |
| | | 52 | | /// </summary> |
| | | 53 | | /// <param name="input"><inheritdoc cref="Input" path="/summary"/></param> |
| | | 54 | | /// <param name="inputWithTerminator"> |
| | | 55 | | /// Whether <paramref name="input"/> is terminated by a terminator char. If so, the last char of |
| | | 56 | | /// <paramref name="input"/> will be treated as a terminator char when comparing chars of the input. |
| | | 57 | | /// Otherwise, <see cref="Comparer{T}.Default"/> for <see cref="char"/> will be used. |
| | | 58 | | /// </param> |
| | 11 | 59 | | public NaiveSingleCharPcsClassifier(string input, bool inputWithTerminator) |
| | 11 | 60 | | { |
| | 11 | 61 | | Input = input; |
| | 11 | 62 | | CharsComparer = inputWithTerminator |
| | 11 | 63 | | ? CharOrTerminatorComparer.Build(input[^1]) |
| | 11 | 64 | | : Comparer<char>.Default; |
| | 11 | 65 | | } |
| | | 66 | | |
| | | 67 | | /// <inheritdoc/> |
| | | 68 | | public IList<int> Classify() => |
| | 331 | 69 | | Input.Select(c1 => Input.Where(c2 => CharsComparer.Compare(c1, c2) > 0).Distinct().Count()).ToList(); |
| | | 70 | | } |