| | 1 | | using MoreStructures.Utilities; |
| | 2 | | using System.Collections; |
| | 3 | |
|
| | 4 | | namespace MoreStructures; |
| | 5 | |
|
| | 6 | | /// <summary> |
| | 7 | | /// A text string with a terminator character, not present in the text. |
| | 8 | | /// </summary> |
| | 9 | | /// <param name="Text">A sequence of chars, of any length (including the empty sequence).</param> |
| | 10 | | /// <param name="Terminator"> |
| | 11 | | /// A terminator character, not present in <paramref name="Text"/>. If not specified <see cref="DefaultTerminator"/> is |
| | 12 | | /// used. |
| | 13 | | /// </param> |
| | 14 | | /// <param name="ValidateInput"> |
| | 15 | | /// Whether the input, and in particular <see cref="Text"/> should be validated, while this object is created. |
| | 16 | | /// Validation takes O(n) time, where n = number of chars in <see cref="Text"/> and can be an heavy operation. |
| | 17 | | /// </param> |
| | 18 | | /// <remarks> |
| | 19 | | /// <para id="usecases"> |
| | 20 | | /// USECASES |
| | 21 | | /// <br/> |
| | 22 | | /// - A terminator-terminated text is required by data structures like Suffix Tries, Trees or Arrays. |
| | 23 | | /// <br/> |
| | 24 | | /// - This object provides type safety, as it allows to tell apart terminator-terminated strings from generic ones. |
| | 25 | | /// <br/> |
| | 26 | | /// - Consistently using <see cref="TextWithTerminator"/>, rather than <see cref="string"/>, in all library |
| | 27 | | /// functionalities ensures that the invariant of a terminator-terminated string is always respected. |
| | 28 | | /// <br/> |
| | 29 | | /// - Most string-related functionalities provided by <see cref="TextWithTerminator"/>, such as |
| | 30 | | /// <see cref="Length"/> and <see cref="this[Index]"/>, as well as <see cref="IEnumerable{T}"/> and |
| | 31 | | /// <see cref="IEnumerable"/> support, are delegated to the underlying string. |
| | 32 | | /// </para> |
| | 33 | | /// </remarks> |
| 1790 | 34 | | public record TextWithTerminator( |
| 1790 | 35 | | IEnumerable<char> Text, |
| 1790 | 36 | | char Terminator = TextWithTerminator.DefaultTerminator, |
| 1808 | 37 | | bool ValidateInput = true) |
| 1790 | 38 | | : IValueEnumerable<char> |
| 1790 | 39 | | { |
| 1790 | 40 | | // Wrapped into a value enumerable to preserve value equality. |
| 1808 | 41 | | private readonly IEnumerable<char> TextAndTerminator = |
| 1808 | 42 | | (Text is string textStr |
| 1808 | 43 | | ? textStr + Terminator |
| 1808 | 44 | | : Text.Append(Terminator)).AsValue(); |
| 1790 | 45 | |
|
| 1790 | 46 | | // Lazy initialized |
| 1808 | 47 | | private int? _length = null; |
| 1790 | 48 | |
|
| 1790 | 49 | | /// <summary> |
| 1790 | 50 | | /// A selector of a part of a <see cref="TextWithTerminator"/> or <see cref="RotatedTextWithTerminator"/>. |
| 1790 | 51 | | /// </summary> |
| 1790 | 52 | | public interface ISelector |
| 1790 | 53 | | { |
| 1790 | 54 | | /// <summary> |
| 1790 | 55 | | /// Extract the substring identified by this selector, out of the provided <see cref="TextWithTerminator"/>. |
| 1790 | 56 | | /// </summary> |
| 1790 | 57 | | /// <param name="text">The text with terminator, to extract a substring of.</param> |
| 1790 | 58 | | /// <returns>A substring, whose length depends on the selector.</returns> |
| 1790 | 59 | | string Of(TextWithTerminator text); |
| 1790 | 60 | |
|
| 1790 | 61 | | /// <summary> |
| 1790 | 62 | | /// Extract the substring identified by this selector, out of the provided |
| 1790 | 63 | | /// <see cref="RotatedTextWithTerminator"/>. |
| 1790 | 64 | | /// </summary> |
| 1790 | 65 | | /// <param name="text">The text with terminator, to extract a substring of.</param> |
| 1790 | 66 | | /// <returns>A substring, whose length depends on the selector.</returns> |
| 1790 | 67 | | string OfRotated(RotatedTextWithTerminator text); |
| 1790 | 68 | | } |
| 1790 | 69 | |
|
| 1790 | 70 | | /// <summary> |
| 1790 | 71 | | /// The special character used as a default terminator for the text to build the Suffix Tree of, when no custom |
| 1790 | 72 | | /// terminator is specified. Should not be present in the text. |
| 1790 | 73 | | /// </summary> |
| 1790 | 74 | | /// <value> |
| 1790 | 75 | | /// A single char. |
| 1790 | 76 | | /// </value> |
| 1790 | 77 | | public const char DefaultTerminator = '$'; |
| 1790 | 78 | |
|
| 1790 | 79 | | /// <summary> |
| 1790 | 80 | | /// <inheritdoc cref="TextWithTerminator" path="/param[@name='Text']"/> |
| 1790 | 81 | | /// </summary> |
| 1790 | 82 | | /// <remarks> |
| 1790 | 83 | | /// Wrapped into a <see cref="IValueEnumerable{T}"/> to preserve value equality. |
| 1790 | 84 | | /// </remarks> |
| 1790 | 85 | | /// <value> |
| 1790 | 86 | | /// A sequence of chars. |
| 1790 | 87 | | /// </value> |
| 1973 | 88 | | public IEnumerable<char> Text { get; init; } = Text.AsValue(); |
| 1790 | 89 | |
|
| 1790 | 90 | | /// <summary> |
| 1790 | 91 | | /// <inheritdoc cref="TextWithTerminator" path="/param[@name='Terminator']"/> |
| 1790 | 92 | | /// </summary> |
| 1790 | 93 | | /// <value> |
| 1790 | 94 | | /// A single char. |
| 1790 | 95 | | /// </value> |
| 12525 | 96 | | public char Terminator { get; init; } = |
| 1808 | 97 | | !ValidateInput || !Text.Contains(Terminator) |
| 1808 | 98 | | ? Terminator |
| 1808 | 99 | | : throw new ArgumentException($"{nameof(Terminator)} shouldn't be included in {nameof(Text)}."); |
| 1790 | 100 | |
|
| 1790 | 101 | | /// <summary> |
| 1790 | 102 | | /// Returns the index of <see cref="Terminator"/> in this <see cref="TextAndTerminator"/>. |
| 1790 | 103 | | /// </summary> |
| 1790 | 104 | | /// <value> |
| 1790 | 105 | | /// A 0-based index. 0 when <see cref="Text"/> is empty, positive otherwise. |
| 1790 | 106 | | /// </value> |
| 59 | 107 | | public int TerminatorIndex => Length - 1; |
| 1790 | 108 | |
|
| 1790 | 109 | | /// <summary> |
| 1790 | 110 | | /// Select a part of this text by the provided selector. |
| 1790 | 111 | | /// </summary> |
| 1790 | 112 | | /// <param name="selector">Any selector acting on a <see cref="TextWithTerminator"/>.</param> |
| 1790 | 113 | | /// <returns>A string containing the selected part.</returns> |
| 5107 | 114 | | public string this[ISelector selector] => selector.Of(this); |
| 1790 | 115 | |
|
| 1790 | 116 | | /// <summary> |
| 1790 | 117 | | /// Select a part of this text by the provided range (start index included, end index excluded). |
| 1790 | 118 | | /// </summary> |
| 1790 | 119 | | /// <param name="range">The range applied to the underlying string.</param> |
| 1790 | 120 | | /// <returns>An <see cref="IEnumerable{T}"/> of chars containing the selected part.</returns> |
| 1790 | 121 | | public IEnumerable<char> this[Range range] => |
| 10137 | 122 | | TextAndTerminator is StringValueEnumerable { StringValue: var str } |
| 10137 | 123 | | ? str[range] |
| 10137 | 124 | | : TextAndTerminator.Take(range).AsValue(); |
| 1790 | 125 | |
|
| 1790 | 126 | | /// <summary> |
| 1790 | 127 | | /// Select a part of this text by the provided index (either w.r.t. the start or to the end of the text). |
| 1790 | 128 | | /// </summary> |
| 1790 | 129 | | /// <param name="index">The index applied to the underlying string.</param> |
| 1790 | 130 | | /// <returns>A char containing the selected part.</returns> |
| 1790 | 131 | | public char this[Index index] => |
| 10104 | 132 | | TextAndTerminator is StringValueEnumerable { StringValue: var str } |
| 10104 | 133 | | ? str[index] |
| 10104 | 134 | | : TextAndTerminator.ElementAtO1(index); |
| 1790 | 135 | |
|
| 1790 | 136 | | /// <summary> |
| 1790 | 137 | | /// The total length of this text, including the terminator. |
| 1790 | 138 | | /// </summary> |
| 1790 | 139 | | /// <value> |
| 1790 | 140 | | /// A positive integer (at least 1). |
| 1790 | 141 | | /// </value> |
| 1790 | 142 | | /// <remarks> |
| 1790 | 143 | | /// <para id="caching"> |
| 1790 | 144 | | /// CACHING |
| 1790 | 145 | | /// <br/> |
| 1790 | 146 | | /// Calculated just once, and cached for later use. |
| 1790 | 147 | | /// <br/> |
| 1790 | 148 | | /// Immutability guarantees correctness. |
| 1790 | 149 | | /// </para> |
| 1790 | 150 | | /// <para id="complexity"> |
| 1790 | 151 | | /// COMPLEXITY |
| 1790 | 152 | | /// <br/> |
| 1790 | 153 | | /// - If the text was built with a <see cref="string"/> as input, the operation is O(1) in time. |
| 1790 | 154 | | /// <br/> |
| 1790 | 155 | | /// - If the text was built with a type optimized by |
| 1790 | 156 | | /// <see cref="EnumerableExtensions.CountO1{TSource}(IEnumerable{TSource})"/>, such as an <see cref="IList"/> |
| 1790 | 157 | | /// or <see cref="IList{T}"/> the operation is O(1) as well. |
| 1790 | 158 | | /// <br/> |
| 1790 | 159 | | /// - Otherwise, the operation is O(n), where n is the length of <see cref="Text"/>. |
| 1790 | 160 | | /// </para> |
| 1790 | 161 | | /// </remarks> |
| 1790 | 162 | | public int Length |
| 1790 | 163 | | { |
| 1790 | 164 | | get |
| 4044 | 165 | | { |
| 4044 | 166 | | if (_length == null) |
| 333 | 167 | | { |
| 333 | 168 | | _length = |
| 333 | 169 | | TextAndTerminator is StringValueEnumerable { StringValue: var str } |
| 333 | 170 | | ? str.Length |
| 333 | 171 | | : TextAndTerminator.CountO1(); |
| 333 | 172 | | } |
| 4044 | 173 | | return _length.Value; |
| 4044 | 174 | | } |
| 1790 | 175 | | } |
| 1790 | 176 | |
|
| 1790 | 177 | | /// <summary> |
| 1790 | 178 | | /// Whether this text starts with the provided suffix. |
| 1790 | 179 | | /// </summary> |
| 1790 | 180 | | /// <param name="prefix">A terminator-included <see cref="IEnumerable{T}"/> of <see cref="char"/>.</param> |
| 1790 | 181 | | /// <returns>True if this text starts by the prefix.</returns> |
| 1790 | 182 | | public bool StartsWith(IEnumerable<char> prefix) => |
| 3 | 183 | | TextAndTerminator is StringValueEnumerable { StringValue: var str } && prefix is string prefixStr |
| 3 | 184 | | ? str.StartsWith(prefixStr) |
| 3 | 185 | | : TextAndTerminator.Take(prefix.CountO1()).SequenceEqual(prefix); |
| 1790 | 186 | |
|
| 1790 | 187 | | /// <summary> |
| 1790 | 188 | | /// Whether this text ends with the provided suffix. |
| 1790 | 189 | | /// </summary> |
| 1790 | 190 | | /// <param name="suffix">A terminator-included <see cref="IEnumerable{T}"/> of <see cref="char"/>.</param> |
| 1790 | 191 | | /// <returns>True if this text ends by the suffix.</returns> |
| 1790 | 192 | | public bool EndsWith(IEnumerable<char> suffix) => |
| 47 | 193 | | TextAndTerminator is StringValueEnumerable { StringValue: var str } && suffix is string suffixStr |
| 47 | 194 | | ? str.EndsWith(suffixStr) |
| 47 | 195 | | : TextAndTerminator.TakeLast(suffix.CountO1()).SequenceEqual(suffix); |
| 1790 | 196 | |
|
| 1790 | 197 | | /// <inheritdoc path="//*[not(self::summary)]"/> |
| 1790 | 198 | | /// <summary> |
| 1790 | 199 | | /// Returns an enumerator that iterates through the collection of chars of the underlying <see cref="Text"/> |
| 1790 | 200 | | /// string, including the <see cref="Terminator"/> char. |
| 1790 | 201 | | /// </summary> |
| 1790 | 202 | | public IEnumerator<char> GetEnumerator() => |
| 190965 | 203 | | TextAndTerminator.GetEnumerator(); |
| 1790 | 204 | |
|
| 1790 | 205 | | /// <inheritdoc path="//*[not(self::summary)]"/> |
| 1790 | 206 | | /// <summary> |
| 1790 | 207 | | /// Returns an enumerator that iterates through the collection of chars of the underlying <see cref="Text"/> |
| 1790 | 208 | | /// string, including the <see cref="Terminator"/> char. |
| 1790 | 209 | | /// </summary> |
| 1790 | 210 | | IEnumerator IEnumerable.GetEnumerator() => |
| 1 | 211 | | ((IEnumerable)TextAndTerminator).GetEnumerator(); |
| 1790 | 212 | | } |