| | 1 | |
|
| | 2 | | namespace MoreStructures.Utilities; |
| | 3 | |
|
| | 4 | | /// <summary> |
| | 5 | | /// Generic utilities and extensions for strings. |
| | 6 | | /// </summary> |
| | 7 | | public static class StringUtilities |
| | 8 | | { |
| | 9 | | /// <summary> |
| | 10 | | /// Returns the length of the longest prefix in common between the provided <see cref="IEnumerable{T}"/> of |
| | 11 | | /// <see cref="char"/>. |
| | 12 | | /// </summary> |
| | 13 | | /// <returns> |
| | 14 | | /// An integer betwen 0 and the length of the shortest of the enumerables provided. |
| | 15 | | /// </returns> |
| | 16 | | /// <remarks> |
| | 17 | | /// <para id="algorithm"> |
| | 18 | | /// ALGORITHM |
| | 19 | | /// <br/> |
| | 20 | | /// - The algorithm iterates over two enumerators in parallel, one enumerator per enumerable provided. |
| | 21 | | /// <br/> |
| | 22 | | /// - It keeps running the enumerators until one of the two is over, or the current items of the two |
| | 23 | | /// enumerators are different. |
| | 24 | | /// <br/> |
| | 25 | | /// - It keeps a counter of the number of chars found equal from the beginning, which is returned as result. |
| | 26 | | /// </para> |
| | 27 | | /// <para id="complexity"> |
| | 28 | | /// COMPLEXITY |
| | 29 | | /// <br/> |
| | 30 | | /// - Enumerator creation, moving to next item and accessing the current item are all constant-time operations. |
| | 31 | | /// <br/> |
| | 32 | | /// - If the two enumerables have length l1 and l2 respectively, there are at most max(l1, l2) iterations. |
| | 33 | | /// <br/> |
| | 34 | | /// - The current item of each enumerable has a constant size. |
| | 35 | | /// <br/> |
| | 36 | | /// - If l1 and l2 are O(n), Time Complexity is O(n) and Space Complexity is O(1). |
| | 37 | | /// </para> |
| | 38 | | /// </remarks> |
| | 39 | | public static int LongestCommonPrefix(IEnumerable<char> enumerable1, IEnumerable<char> enumerable2) |
| 507 | 40 | | { |
| 507 | 41 | | var result = 0; |
| 507 | 42 | | var enumerator1 = enumerable1.GetEnumerator(); |
| 507 | 43 | | var enumerator2 = enumerable2.GetEnumerator(); |
| | 44 | |
|
| 507 | 45 | | var enumerator1MoveNext = enumerator1.MoveNext(); |
| 507 | 46 | | var enumerator2MoveNext = enumerator2.MoveNext(); |
| 997 | 47 | | while (enumerator1MoveNext && enumerator2MoveNext && enumerator1.Current == enumerator2.Current) |
| 490 | 48 | | { |
| 490 | 49 | | result++; |
| 490 | 50 | | enumerator1MoveNext = enumerator1.MoveNext(); |
| 490 | 51 | | enumerator2MoveNext = enumerator2.MoveNext(); |
| 490 | 52 | | } |
| | 53 | |
|
| 507 | 54 | | return result; |
| 507 | 55 | | } |
| | 56 | |
|
| | 57 | | /// <summary> |
| | 58 | | /// Returns the length of the longest prefix in common between the substrings of the provided <see cref="string"/> |
| | 59 | | /// instances, starting at the provided indexes. |
| | 60 | | /// </summary> |
| | 61 | | /// <returns> |
| | 62 | | /// An integer betwen 0 and the length of the shortest of the strings provided, minus related starting index. |
| | 63 | | /// </returns> |
| | 64 | | /// <remarks> |
| | 65 | | /// <para id="advantages"> |
| | 66 | | /// ADVANTAGES AND DISADVANTAGES |
| | 67 | | /// <br/> |
| | 68 | | /// - While <c>LongestCommonPrefix(s1, i1, s2, i2)</c> is conceptually equivalent to |
| | 69 | | /// <c>LongestCommonPrefix(s1.Skip(i1), s2.Skip(i2))</c>, making use of the general implementation of LCP |
| | 70 | | /// given by <see cref="LongestCommonPrefix(IEnumerable{char}, IEnumerable{char})"/>, this specific |
| | 71 | | /// implementation, with <see cref="string"/> instances and <see cref="int"/> starting indexes, has been |
| | 72 | | /// provided. |
| | 73 | | /// <br/> |
| | 74 | | /// - The reason behind the decision of providing a specialized implementation is that the LINQ method |
| | 75 | | /// <see cref="Enumerable.Skip{TSource}(IEnumerable{TSource}, int)"/> is O(n), rather than O(1). |
| | 76 | | /// <br/> |
| | 77 | | /// - Moreover, the performance "suboptimality" cannot be overcome by using ranges on <see cref="string"/>: |
| | 78 | | /// <c>LongestCommonPrefix(s1[i1..], s2[i2..])</c> builds two new strings, <c>s1[i1..]</c> and |
| | 79 | | /// <c>s2[i2..]</c>, by copying the part of the underlying arrays of s1 and s2 starting at index i1 and i2 |
| | 80 | | /// respectively. This results in <c>s1[i1..]</c> and <c>s2[i2..]</c> being O(n) both in time and space. |
| | 81 | | /// <br/> |
| | 82 | | /// - Check https://stackoverflow.com/questions/6742923/, and the answer from Eric Lippert, for further |
| | 83 | | /// information about the performance of substring extraction from a <see cref="string"/> instance. |
| | 84 | | /// </para> |
| | 85 | | /// <para id="algorithm"> |
| | 86 | | /// ALGORITHM |
| | 87 | | /// <br/> |
| | 88 | | /// - The algorithm iterates over two strings in parallel, each from the provided index. |
| | 89 | | /// <br/> |
| | 90 | | /// - It keeps checking chars from the two strings until one of the two strings is over, or the current char |
| | 91 | | /// of the two strings are different. |
| | 92 | | /// <br/> |
| | 93 | | /// - It keeps a counter of the number of chars found equal from the beginning, which is returned as result. |
| | 94 | | /// </para> |
| | 95 | | /// <para id="complexity"> |
| | 96 | | /// COMPLEXITY |
| | 97 | | /// <br/> |
| | 98 | | /// - Direct char accessm on a string, as well as checking its length, are constant-time operations. |
| | 99 | | /// <br/> |
| | 100 | | /// - If the two strings have length l1 and l2 respectively, and starting indexes are i1 and i2 respectively, |
| | 101 | | /// there are at most max(l1 - i1, l2 - i2) iterations. |
| | 102 | | /// <br/> |
| | 103 | | /// - If l1, i1, l2 and i2 are O(n), Time Complexity is O(n) and Space Complexity is O(1). |
| | 104 | | /// </para> |
| | 105 | | /// </remarks> |
| | 106 | | public static int LongestCommonPrefix(string string1, int index1, string string2, int index2) |
| 236 | 107 | | { |
| 236 | 108 | | if (index1 < 0 || index1 >= string1.Length) |
| 2 | 109 | | throw new ArgumentOutOfRangeException( |
| 2 | 110 | | nameof(index1), $"Must be non-negative and smaller than {nameof(string1)} length."); |
| 234 | 111 | | if (index2 < 0 || index2 >= string2.Length) |
| 2 | 112 | | throw new ArgumentOutOfRangeException( |
| 2 | 113 | | nameof(index2), $"Must be non-negative and smaller than {nameof(string2)} length."); |
| | 114 | |
|
| 232 | 115 | | var result = 0; |
| 353 | 116 | | while (index1 < string1.Length && index2 < string2.Length && string1[index1] == string2[index2]) |
| 121 | 117 | | { |
| 121 | 118 | | index1++; |
| 121 | 119 | | index2++; |
| 121 | 120 | | result++; |
| 121 | 121 | | } |
| | 122 | |
|
| 232 | 123 | | return result; |
| 232 | 124 | | } |
| | 125 | | } |