Repository: incubator-quickstep Updated Branches: refs/heads/master 563abe043 -> 6e3499a80
Topological sort functionality in DAG - Implemented a very simple Kahn's algorithm for topological sorting of nodes in the DAG class. Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/6e3499a8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/6e3499a8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/6e3499a8 Branch: refs/heads/master Commit: 6e3499a80c559cb3ab11b9800cf5813b0f233f77 Parents: 563abe0 Author: Harshad Deshmukh <hbdeshm...@apache.org> Authored: Tue Apr 18 15:34:16 2017 -0500 Committer: Harshad Deshmukh <hbdeshm...@apache.org> Committed: Wed Apr 19 11:06:38 2017 -0500 ---------------------------------------------------------------------- utility/DAG.hpp | 51 +++++++++++++++++++++++ utility/tests/DAG_unittest.cpp | 82 +++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6e3499a8/utility/DAG.hpp ---------------------------------------------------------------------- diff --git a/utility/DAG.hpp b/utility/DAG.hpp index a1f2619..c286880 100644 --- a/utility/DAG.hpp +++ b/utility/DAG.hpp @@ -22,6 +22,7 @@ #include <cstddef> #include <memory> +#include <queue> #include <unordered_map> #include <unordered_set> #include <utility> @@ -246,6 +247,11 @@ class DAG { return nodes_[node_index].getDependents().end(); } + /** + * @brief Get a topologically sorted list of node IDs. + **/ + std::vector<size_type_nodes> getTopologicalSorting() const; + private: /** * @brief A node in the DAG which contains a payload. DAGNode owns its @@ -489,6 +495,51 @@ bool DAG<T, LinkMetadataT>::hasCycleHelper(const typename DAG<T, LinkMetadataT>: return false; } +template <class T, class LinkMetadataT> +std::vector<typename DAG<T, LinkMetadataT>::size_type_nodes> +DAG<T, LinkMetadataT>::getTopologicalSorting() const { + // As a clarification, if A->B then A is the dependency for B and B is dependent on A. + // We implement "Kahn's algorithm" for the sorting. + DCHECK(!hasCycle()); + // This list is going to be the topologically sorted output. + std::unique_ptr<std::vector<typename DAG<T, LinkMetadataT>::size_type_nodes>> + sorted_list(new std::vector<size_type_nodes>()); + sorted_list->reserve(this->size()); + // Key = node ID, value = # incoming edges for this node. + // NOTE(harshad) - We modify the "values" in this map as we go along. + std::unordered_map<typename DAG<T, LinkMetadataT>::size_type_nodes, + std::size_t> num_dependencies; + std::queue<typename DAG<T, LinkMetadataT>::size_type_nodes> nodes_with_no_dependencies; + // First store the nodes without any dependencies in a list. + // Also remember the number of dependencies for each node in a map. + for (auto node_id = 0u; node_id < this->size(); ++node_id) { + if (nodes_[node_id].getDependencies().empty()) { + nodes_with_no_dependencies.emplace(node_id); + } + num_dependencies[node_id] = nodes_[node_id].getDependencies().size(); + } + // The algorithm begins now. + while (!nodes_with_no_dependencies.empty()) { + // For a node with no dependencies ... + auto curr_node = nodes_with_no_dependencies.front(); + nodes_with_no_dependencies.pop(); + // Add the node to the sorted list. + sorted_list->emplace_back(curr_node); + auto dependents_of_curr_node = getDependents(curr_node); + for (auto dependent_iterator : dependents_of_curr_node) { + // For each dependent of the current node ... + auto dependent_node_id = dependent_iterator.first; + // Remove the incoming edge from curr_node. + DCHECK_GE(num_dependencies[dependent_node_id], 1u); + if (--num_dependencies[dependent_node_id] == 0) { + // Now this node has no children. + nodes_with_no_dependencies.emplace(dependent_node_id); + } + } + } + return *(sorted_list.release()); +} + /** @} */ } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6e3499a8/utility/tests/DAG_unittest.cpp ---------------------------------------------------------------------- diff --git a/utility/tests/DAG_unittest.cpp b/utility/tests/DAG_unittest.cpp index 3fe2990..3e8d167 100644 --- a/utility/tests/DAG_unittest.cpp +++ b/utility/tests/DAG_unittest.cpp @@ -490,6 +490,88 @@ TEST(DAGTest, LinkMetadataBoolTest) { EXPECT_FALSE(dag_.getLinkMetadata(1, 0)); } +TEST(DAGTest, TopoSortTest) { + const int kNodeSize = 5; + DAG<DummyPayload, int> dag_; + + for (int node_index = 0; node_index < kNodeSize; ++node_index) { + ASSERT_EQ(static_cast<std::size_t>(node_index), + dag_.createNode(new DummyPayload(node_index))); + } + + /* + * 0 + * / \ + * v v + * 1 2 + * \ / + * v + * 3 + * | + * v + * 4 + * + */ + + dag_.createLink(0, 1, 2); + dag_.createLink(0, 2, 1); + dag_.createLink(1, 3, 1); + dag_.createLink(2, 3, 1); + dag_.createLink(3, 4, 1); + + vector<DAG<DummyPayload, int>::size_type_nodes> sorted_list = + dag_.getTopologicalSorting(); + std::unordered_map<DAG<DummyPayload, int>::size_type_nodes, bool> node_exists; + // First check if the ordering is a legal sequence of nodes, i.e. every node + // appears exactly once. + for (auto node_id = 0u; node_id < dag_.size(); ++node_id) { + node_exists[node_id] = false; + } + for (auto i : sorted_list) { + node_exists[i] = true; + } + for (auto node_id = 0u; node_id < dag_.size(); ++node_id) { + ASSERT_TRUE(node_exists[node_id]); + } + // We apply the following condition for verifying if we have obtained a valid + // topological sorting. + // If there is an edge i->j between two nodes i and j, then i comes before j + // in the topologically sorted order. + // We use a map to store the position of a given node in the sorted order. + // Key = node ID, value = position of the node in the sorted order. + std::unordered_map<std::size_t, std::size_t> position_in_sorted_order; + for (std::size_t i = 0; i < sorted_list.size(); ++i) { + position_in_sorted_order[sorted_list[i]] = i; + } + std::vector<std::tuple<std::size_t, std::size_t>> edges; + // Populate the list of edges. + edges.emplace_back(0, 1); + edges.emplace_back(0, 2); + edges.emplace_back(1, 3); + edges.emplace_back(2, 3); + edges.emplace_back(3, 4); + for (auto curr_edge : edges) { + // (i, j) : i is "from node", j is "to node". + std::size_t from_node_position = position_in_sorted_order[std::get<0>(curr_edge)]; + std::size_t to_node_position = position_in_sorted_order[std::get<1>(curr_edge)]; + ASSERT_LT(from_node_position, to_node_position); + } + // Now extend the same logic that we applied for edges for paths in the DAG. + // We have already verified paths with length = 1 (edges), so we will only + // consider paths with length more than one. + std::vector<std::tuple<std::size_t, std::size_t>> paths; + paths.emplace_back(0, 3); + paths.emplace_back(0, 4); + paths.emplace_back(1, 4); + paths.emplace_back(2, 4); + for (auto curr_path : paths) { + // (i, j) : i is "from node", j is "to node". + std::size_t from_node_position = position_in_sorted_order[std::get<0>(curr_path)]; + std::size_t to_node_position = position_in_sorted_order[std::get<1>(curr_path)]; + ASSERT_LT(from_node_position, to_node_position); + } +} + #ifdef QUICKSTEP_DEBUG TEST(DAGDeathTest, SixNodeStagesCycleTest) { const int kNodeSize = 6;