| | 1 | | using MoreStructures.DisjointSets; |
| | 2 | | using MoreStructures.Lists.Sorting; |
| | 3 | |
|
| | 4 | | namespace MoreStructures.Graphs.MinimumSpanningTree; |
| | 5 | |
|
| | 6 | | /// <summary> |
| | 7 | | /// A <see cref="IMstFinder"/> based on the Kruskal's algorithm. |
| | 8 | | /// </summary> |
| | 9 | | /// <remarks> |
| | 10 | | /// <para id="algorithm"> |
| | 11 | | /// ALGORITHM |
| | 12 | | /// <br/> |
| | 13 | | /// - First, all edges of the graph are sorted in ascending order, using the <see cref="Sorter"/> provided in the |
| | 14 | | /// constructor. |
| | 15 | | /// <br/> |
| | 16 | | /// - Then, set of edges MST, which will contain the result, is initialized to an empty set (i.e. no edges included |
| | 17 | | /// in the spanning tree, before the main loop). |
| | 18 | | /// <br/> |
| | 19 | | /// - A <see cref="IDisjointSet"/> DS is also initialized to contain the v values from 0 to v - 1, each value |
| | 20 | | /// representing a vertex of the graph and at the beginning disjoint from all others. |
| | 21 | | /// <br/> |
| | 22 | | /// - The disjoint set DS will be used as a <b>cycle detector</b>, i.e. as a data structure able to determine |
| | 23 | | /// nearly in O(1) (more precisely in O(log*(n)) whether adding an edge to MST would result in a cycle. |
| | 24 | | /// <br/> |
| | 25 | | /// - Then, the main loop of the algorithm is executed, until MST contains exactly v - 1 edges in it (any spanning |
| | 26 | | /// tree has to have exactly v vertices and v - 1 edges, otherwise it would not be spanning, or it would not be |
| | 27 | | /// a tree). |
| | 28 | | /// <br/> |
| | 29 | | /// - At each iteration, the next minimum edge e is added to MST, if it doesn't introduce a cycle. Otherwise, it is |
| | 30 | | /// discarded, |
| | 31 | | /// <br/> |
| | 32 | | /// - To verify whether e = (u, v) introduces a cycle in MST or not, it checks whether v and u are already |
| | 33 | | /// connected in DS (via <see cref="IDisjointSet.AreConnected(int, int)"/>), i.e. whether they belong to the same |
| | 34 | | /// set. |
| | 35 | | /// <br/> |
| | 36 | | /// - If so, it means that there is already a path (sequence of edges) in MST connecting u and v. Therefore the |
| | 37 | | /// introduction of the edge e = (u, v) would give a second path from u to v, hence a cycle. |
| | 38 | | /// <br/> |
| | 39 | | /// - Otherwise, e can safely be added to MST with no introduction of a cycle. The vertices u and v are also united |
| | 40 | | /// in DS, to reflect the addition of e into MST, in DS as well. |
| | 41 | | /// <br/> |
| | 42 | | /// - When MST contains v - 1 edges, the number of disjoint sets in DS is checked: if it is 1, it means that all |
| | 43 | | /// vertices in the graph have been connected by MST, and MST is indeed a Minimum Spanning Tree. |
| | 44 | | /// <br/> |
| | 45 | | /// - If it is not the case, and there is more than a single disjoint set, an |
| | 46 | | /// <see cref="InvalidOperationException"/> is thrown, since the graph is not connected, and a single tree |
| | 47 | | /// spanning the entire graph cannot be constructed. |
| | 48 | | /// </para> |
| | 49 | | /// <para id="complexity"> |
| | 50 | | /// COMPLEXITY |
| | 51 | | /// <br/> |
| | 52 | | /// - The algorithm makes use of an <see cref="IInputMutatingSort"/> algorithm, to sort all the edges of the graph i |
| | 53 | | /// ascending order by their distance. |
| | 54 | | /// <br/> |
| | 55 | | /// - Since sorting edges is required by the algorithm, the overall complexity is lower bounded by |
| | 56 | | /// O(e * log(e)) = O(e * log(v)). |
| | 57 | | /// <br/> |
| | 58 | | /// - Such complexity can only be reduced when specific assumptions can be made, on the distance values of the |
| | 59 | | /// edges (e.g. if integer and within a small range, a non-comparison based, linear search, such as Counting |
| | 60 | | /// Sort, may be used). |
| | 61 | | /// <br/> |
| | 62 | | /// - The algorithm also makes use of a <see cref="IDisjointSet"/> data structure, to detect loops after the |
| | 63 | | /// potential introduction of each single edge of the graph. |
| | 64 | | /// <br/> |
| | 65 | | /// - For that reason, for the algorithm to be efficient, when building the Minimum Spanning Tree, the disjoint |
| | 66 | | /// set should be able to verify connection (via <see cref="IDisjointSet.AreConnected(int, int)"/>) and to |
| | 67 | | /// merge vertices (via <see cref="IDisjointSet.Union(int, int)"/>) in (nearly) constant time. |
| | 68 | | /// <br/> |
| | 69 | | /// - While this is actually possible, by using a <see cref="PathCompressionWeightedQuickUnionDisjointSet"/>, the |
| | 70 | | /// overall complexity is dominated by the edge sorting, which is in general linearithmic over the edges. |
| | 71 | | /// <br/> |
| | 72 | | /// - For these reasons, Time Complexity is O(e * log(v)). Space Complexity is O(v + e), since large-enough data |
| | 73 | | /// structures are required to store sorted edges and disjoint sets of vertices. |
| | 74 | | /// </para> |
| | 75 | | /// </remarks> |
| | 76 | | public class KruskalMstFinder : IMstFinder |
| | 77 | | { |
| | 78 | | /// <summary> |
| | 79 | | /// The <see cref="IInputMutatingSort"/> algorithm to be used to sort the edges of the graph. |
| | 80 | | /// </summary> |
| 14 | 81 | | public IInputMutatingSort Sorter { get; } |
| | 82 | |
|
| | 83 | | /// <summary> |
| | 84 | | /// A build of <see cref="IDisjointSet"/> instances, used by the algorithm to detect potential cycles in the MST. |
| | 85 | | /// </summary> |
| 14 | 86 | | public Func<int, IDisjointSet> DisjointSetBuilder { get; } |
| | 87 | |
|
| | 88 | | /// <inheritdoc cref="KruskalMstFinder"/> |
| | 89 | | /// <param name="sorter"> |
| | 90 | | /// <inheritdoc cref="Sorter" path="/summary"/> |
| | 91 | | /// </param> |
| | 92 | | /// <param name="disjointSetBuilder"> |
| | 93 | | /// <inheritdoc cref="DisjointSetBuilder" path="/summary"/> |
| | 94 | | /// </param> |
| 14 | 95 | | public KruskalMstFinder(IInputMutatingSort sorter, Func<int, IDisjointSet> disjointSetBuilder) |
| 14 | 96 | | { |
| 14 | 97 | | Sorter = sorter; |
| 14 | 98 | | DisjointSetBuilder = disjointSetBuilder; |
| 14 | 99 | | } |
| | 100 | |
|
| | 101 | | /// <inheritdoc path="//*[not(self::summary or self::remarks)]"/> |
| | 102 | | /// <summary> |
| | 103 | | /// Finds the Minimum Spanning Tree (MST) of the provided <paramref name="graph"/> using the Kruskal's algorithm. |
| | 104 | | /// </summary> |
| | 105 | | /// <remarks> |
| | 106 | | /// <inheritdoc cref="KruskalMstFinder"/> |
| | 107 | | /// </remarks> |
| | 108 | | public ISet<(int, int)> Find(IGraph graph, IGraphDistances distances) |
| 14 | 109 | | { |
| 14 | 110 | | var edges = graph.GetAllEdges().ToList(); |
| 14 | 111 | | Sorter.Sort(edges, new EdgesComparer(distances)); |
| | 112 | |
|
| 14 | 113 | | var numberOfVertices = graph.GetNumberOfVertices(); |
| 14 | 114 | | var cycleDetector = DisjointSetBuilder(numberOfVertices); |
| 14 | 115 | | var mst = new HashSet<(int, int)> { }; |
| 14 | 116 | | var currentEdgeIndex = 0; |
| 56 | 117 | | while (mst.Count < numberOfVertices - 1 && currentEdgeIndex < edges.Count) |
| 42 | 118 | | { |
| 42 | 119 | | var edge = edges[currentEdgeIndex++]; |
| 42 | 120 | | if (cycleDetector.AreConnected(edge.edgeStart, edge.edgeEnd)) |
| 13 | 121 | | continue; |
| | 122 | |
|
| 29 | 123 | | mst.Add(edge); |
| 29 | 124 | | cycleDetector.Union(edge.edgeStart, edge.edgeEnd); |
| 29 | 125 | | } |
| | 126 | |
|
| 14 | 127 | | if (cycleDetector.SetsCount > 1) |
| 4 | 128 | | throw new InvalidOperationException("The graph is not connected."); |
| | 129 | |
|
| 10 | 130 | | return mst; |
| 10 | 131 | | } |
| | 132 | |
|
| | 133 | | private sealed class EdgesComparer : IComparer<(int, int)> |
| | 134 | | { |
| 286 | 135 | | private IGraphDistances Distances { get; } |
| | 136 | |
|
| 14 | 137 | | public EdgesComparer(IGraphDistances distances) |
| 14 | 138 | | { |
| 14 | 139 | | Distances = distances; |
| 14 | 140 | | } |
| | 141 | |
|
| 143 | 142 | | public int Compare((int, int) x, (int, int) y) => Comparer<int>.Default.Compare(Distances[x], Distances[y]); |
| | 143 | | } |
| | 144 | | } |