| | 1 | | using MoreStructures.Graphs.Visitor; |
| | 2 | |
|
| | 3 | | namespace MoreStructures.Graphs.Sorting; |
| | 4 | |
|
| | 5 | | /// <summary> |
| | 6 | | /// An <see cref="ITopologicalSort"/> implementation which assigns topological order to vertices by identifing a sink |
| | 7 | | /// vertex at each iteration <b>in a deterministic way</b>, running DFS on each vertex and selecting the max sink. |
| | 8 | | /// </summary> |
| | 9 | | /// <remarks> |
| | 10 | | /// <para id="advantages"> |
| | 11 | | /// ADVANTAGES AND DISADVANTAGES |
| | 12 | | /// <br/> |
| | 13 | | /// - This is a very naive implementation of <see cref="ITopologicalSort"/>. |
| | 14 | | /// <br/> |
| | 15 | | /// - Using DFS on each vertex and building a "reachability dictionary" is conceptually simple, but very |
| | 16 | | /// unefficient, leading to cubic runtime over the number of vertices (compared to the quadratic runtime of |
| | 17 | | /// <see cref="AnyPathToSinkBasedTopologicalSort"/> or the linear runtime of an approach based on a single DFS |
| | 18 | | /// pass of the graph). |
| | 19 | | /// <br/> |
| | 20 | | /// - However, because all reachable vertices are retrieved, and the max is selected when looking for a sink, the |
| | 21 | | /// final topological sort returned is easily predictable: when multiple topological sort orders are possible, |
| | 22 | | /// the one returned is the one which has higher vertex ids at the end of the output array. |
| | 23 | | /// <br/> |
| | 24 | | /// - The deterministic property is not satisfied by some implementations with better runtime, such as |
| | 25 | | /// <see cref="AnyPathToSinkBasedTopologicalSort"/>, which starts graph exploration from "some" vertex and |
| | 26 | | /// follows "some" path, to get to a sink. |
| | 27 | | /// </para> |
| | 28 | | /// <para id="algorithm"> |
| | 29 | | /// ALGORITHM |
| | 30 | | /// <br/> |
| | 31 | | /// - First, all reachable vertices, from each vertex of the graph, are identified, running a |
| | 32 | | /// <see cref="IVisitStrategy.DepthFirstSearchFromVertex(IGraph, int)"/> via the <see cref="VisitStrategy"/> |
| | 33 | | /// provided in the constructor. |
| | 34 | | /// <br/> |
| | 35 | | /// - Reachable vertices from each vertex are stored into a <see cref="Dictionary{TKey, TValue}"/> D, mapping the |
| | 36 | | /// <see cref="int"/> id of the vertex i to an <see cref="HashSet{T}"/> of vertices reachable from i (including |
| | 37 | | /// i itself). |
| | 38 | | /// <br/> |
| | 39 | | /// - Then, an array of v integers, named here TS, where v is the number of vertices in the graph, is instantiated, |
| | 40 | | /// to store the topological sort of the provided DAG. |
| | 41 | | /// <br/> |
| | 42 | | /// - An <see cref="HashSet{T}"/> of vertices already processed (and removed from D) is also instantiated to an |
| | 43 | | /// empty set. |
| | 44 | | /// <br/> |
| | 45 | | /// - Finally the main loop of the algorithm is repeated, until D remains empty. |
| | 46 | | /// <br/> |
| | 47 | | /// - At each iteration, the sink vertex with the biggest index is found. |
| | 48 | | /// <br/> |
| | 49 | | /// - The sink is found by looking for vertices from which no other vertices are reachable, other than the vertex |
| | 50 | | /// itself and possibly the already processed ones (which are "virtually" removed from the graph). |
| | 51 | | /// <br/> |
| | 52 | | /// - If such a sink is not found, it means that the remaining graph contains a cycle, because there is at least a |
| | 53 | | /// vertex in the graph (otherwise the main loop condition would have been false) and for each vertex i there is |
| | 54 | | /// a vertex j such that a directed path from i to j, i ~~> j, exists. So an |
| | 55 | | /// <see cref="InvalidOperationException"/> is thrown as topological sort only makes sense for DAGs. |
| | 56 | | /// <br/> |
| | 57 | | /// - If such a sink is found instead, it is returned as (v - 1 - i)-th item of TS, where i is the 0-based running |
| | 58 | | /// index of the main loop of the algorithm, incremented by 1 at each iteration. |
| | 59 | | /// <br/> |
| | 60 | | /// - Notice that not any sink is taken: <b>instead of stopping at the first sink found, the sink with the highest |
| | 61 | | /// id is returned</b>. This makes the algorithm fully deterministic and ensures full predictability of the |
| | 62 | | /// result. |
| | 63 | | /// <br/> |
| | 64 | | /// - Finally the array TS is returned. |
| | 65 | | /// </para> |
| | 66 | | /// <para id="complexity"> |
| | 67 | | /// COMPLEXITY |
| | 68 | | /// <br/> |
| | 69 | | /// - Getting the number of vertices via <see cref="IGraph.GetNumberOfVertices"/> is a constant-time operation in |
| | 70 | | /// any classical implementation of <see cref="IGraph"/>. |
| | 71 | | /// <br/> |
| | 72 | | /// - Running DFS for each vertex of the graph via <see cref="IVisitStrategy.DepthFirstSearchFromVertex"/> is a |
| | 73 | | /// generally expensive operation, which has a Time and Space Complexity which depends on the specific |
| | 74 | | /// <see cref="IVisitStrategy"/> used. |
| | 75 | | /// <br/> |
| | 76 | | /// - Instantiating the TS array also requires resetting all its v items. |
| | 77 | | /// <br/> |
| | 78 | | /// - The main loop of the algorithm, executed v times, requires finding the last sink vertex, which is an |
| | 79 | | /// operation quadratic in complexity. All other operations require constant time and space. |
| | 80 | | /// <br/> |
| | 81 | | /// - Therefore Time Complexity is O(v * Tdfs + v^3) and Space Complexity is O(v^2 + Sdfs), where Tdfs and Sdfs |
| | 82 | | /// are Time and Space Complexity of running a DFS from a vertex. |
| | 83 | | /// <br/> |
| | 84 | | /// - Using <see cref="FullyIterativeHashSetBasedGraphVisit"/> as <see cref="VisitStrategy"/> and assuming |
| | 85 | | /// <see cref="IGraph.GetAdjacentVerticesAndEdges(int, bool)"/> can be computed in constant time, Tdfs and Sdfs |
| | 86 | | /// becomes linear in v and e. |
| | 87 | | /// <br/> |
| | 88 | | /// - In this case, Time Complexity becomes O(v * (v + e) + v^3) which can be simplified as O(v^3), since e can |
| | 89 | | /// be at most O(v^2) (in graphs where every vertex is connected to every other vertex). |
| | 90 | | /// </para> |
| | 91 | | /// </remarks> |
| | 92 | | public class DfsOnEachVertexSinkBasedTopologicalSort : ITopologicalSort |
| | 93 | | { |
| | 94 | | /// <summary> |
| | 95 | | /// The visitor to be used to identify sink vertices, by running Depth First Searches on the graph. |
| | 96 | | /// </summary> |
| 186 | 97 | | public IVisitStrategy VisitStrategy { get; } |
| | 98 | |
|
| | 99 | | /// <summary> |
| | 100 | | /// <inheritdoc cref="DfsOnEachVertexSinkBasedTopologicalSort"/> |
| | 101 | | /// </summary> |
| | 102 | | /// <param name="visitStrategy"> |
| | 103 | | /// <inheritdoc cref="VisitStrategy" path="/summary"/> |
| | 104 | | /// </param> |
| | 105 | | /// <remarks> |
| | 106 | | /// <inheritdoc cref="DfsOnEachVertexSinkBasedTopologicalSort"/> |
| | 107 | | /// </remarks> |
| 54 | 108 | | public DfsOnEachVertexSinkBasedTopologicalSort(IVisitStrategy visitStrategy) |
| 54 | 109 | | { |
| 54 | 110 | | VisitStrategy = visitStrategy; |
| 54 | 111 | | } |
| | 112 | |
|
| | 113 | | /// <inheritdoc path="//*[not(self::remarks)]"/> |
| | 114 | | /// <remarks> |
| | 115 | | /// <inheritdoc cref="DfsOnEachVertexSinkBasedTopologicalSort"/> |
| | 116 | | /// </remarks> |
| | 117 | | public IList<int> Sort(IGraph dag) |
| 54 | 118 | | { |
| | 119 | | // Find reachable nodes for each vertex |
| 54 | 120 | | var numberOfVertices = dag.GetNumberOfVertices(); |
| 54 | 121 | | var verticesReachableFromVertices = new Dictionary<int, HashSet<int>>( |
| 54 | 122 | | from vertex in Enumerable.Range(0, numberOfVertices) |
| 186 | 123 | | let verticesReachableFromVertex = VisitStrategy.DepthFirstSearchFromVertex(dag, vertex).ToHashSet() |
| 240 | 124 | | select KeyValuePair.Create(vertex, verticesReachableFromVertex)); |
| | 125 | |
|
| 54 | 126 | | var topologicalSort = new int[numberOfVertices]; |
| 54 | 127 | | var removedVertices = new HashSet<int>(); |
| 180 | 128 | | while (verticesReachableFromVertices.Count > 0) |
| 147 | 129 | | { |
| 147 | 130 | | var lastSink = ( |
| 147 | 131 | | from verticesReachableFromVertex in verticesReachableFromVertices |
| 399 | 132 | | let vertex = verticesReachableFromVertex.Key |
| 399 | 133 | | where !verticesReachableFromVertex.Value.Except(removedVertices).Except(new[] { vertex }).Any() // Sink |
| 153 | 134 | | select vertex) |
| 147 | 135 | | .DefaultIfEmpty(-1) |
| 147 | 136 | | .Max(); |
| | 137 | |
|
| 147 | 138 | | if (lastSink < 0) |
| 21 | 139 | | throw new InvalidOperationException($"The provided {nameof(dag)} is not a Direct Acyclic Graph"); |
| | 140 | |
|
| 126 | 141 | | topologicalSort[lastSink] = numberOfVertices - 1 - removedVertices.Count; |
| 126 | 142 | | removedVertices.Add(lastSink); |
| 126 | 143 | | verticesReachableFromVertices.Remove(lastSink); |
| 126 | 144 | | } |
| | 145 | |
|
| 33 | 146 | | return topologicalSort; |
| 33 | 147 | | } |
| | 148 | | } |