[GitHub] [incubator-mxnet] larroy commented on a change in pull request #16654: Multithreaded Inference Support

GitBox Wed, 08 Jan 2020 11:54:07 -0800

larroy commented on a change in pull request #16654: Multithreaded Inference 
Support
URL: https://github.com/apache/incubator-mxnet/pull/16654#discussion_r364411275


 ##########
 File path: src/imperative/cached_op.h
 ##########
 @@ -26,8 +26,180 @@
 #include <utility>
 #include <string>
 #include <unordered_map>
+#include <map>
+#include "../operator/operator_common.h"
+#include "../operator/subgraph/common.h"
+#include "./imperative_utils.h"
 
 namespace mxnet {
+namespace {
+
+  static const char FULL[] = "full";
+  static const char FORWARD[] = "forward";
+  static const char BACKWARD[] = "backward";
+  static const char REF_COUNT[] = "ref_count";
+  static const char MEM_PLAN[] = "mem_plan";
+  static const char STORAGE_PLAN[] = "storage_plan";
+
+std::string AddPrefix(const std::string& prefix,
+                      const std::string& s) {
+  return prefix + "_" + s;
+}
+void CreateFullGraph(const nnvm::Symbol& sym,
+                     nnvm::Graph* fwd_graph,
+                     nnvm::Graph* grad_graph,
+                     nnvm::Graph* full_graph,
+                     std::vector<nnvm::NodeEntry>* ograd_entries,
+                     std::unordered_map<uint32_t, uint32_t>* 
fwd_input_to_grad_output) {
+  using namespace nnvm;
+  static const std::vector<const Op*> zero_ops{Op::Get("zeros_like"), 
Op::Get("_zeros")};
+  static const auto _copy_op = Op::Get("_copy");
+  {
+    NodeEntryMap<size_t> dedup_out;
+    for (const NodeEntry& nodeEntry : sym.outputs) {
+      if (dedup_out.find(nodeEntry) != dedup_out.end()) {
+        NodePtr copy_node = Node::Create();
+        copy_node->attrs.op = _copy_op;
+        copy_node->attrs.name =
+            nodeEntry.node->attrs.name + "_copy" + 
std::to_string(dedup_out[nodeEntry]++);
+        copy_node->inputs.emplace_back(nodeEntry);
+        if (_copy_op->attr_parser != nullptr) {
+          _copy_op->attr_parser(&(copy_node->attrs));
+        }
+        fwd_graph->outputs.emplace_back(std::move(copy_node));
+      } else {
+        dedup_out.emplace(nodeEntry, 0);
+        fwd_graph->outputs.push_back(nodeEntry);
+      }
+    }
+  }
+
+  bool do_elim_common_expr = dmlc::GetEnv("MXNET_ELIMINATE_COMMON_EXPR", true);
+  if (do_elim_common_expr)
+    *fwd_graph = exec::EliminateCommonExpr(std::move(*fwd_graph));
+
+  // construct backward graph
+  {
+    ograd_entries->reserve(fwd_graph->outputs.size());
+    for (size_t i = 0; i < fwd_graph->outputs.size(); ++i) {
+      nnvm::NodePtr np = Node::Create();
+      np->attrs.name = "_head_grad_" + std::to_string(i);
+      ograd_entries->emplace_back(np);
+    }
+
+    std::vector<NodeEntry> xs;
+    const IndexedGraph& indexed_graph = fwd_graph->indexed_graph();
+    for (size_t i = 0; i < indexed_graph.input_nodes().size(); ++i) {
+      const uint32_t node_id = indexed_graph.input_nodes()[i];
+      if (indexed_graph.mutable_input_nodes().count(node_id))
+        continue;
+      (*fwd_input_to_grad_output)[i] = xs.size();
+      xs.emplace_back(indexed_graph[node_id].weak_ref.lock());
+    }
+
+    CHECK(!xs.empty())
+        << "There are no inputs in computation graph that require gradients.";
+
+    *grad_graph = pass::MXGradient(
+        *fwd_graph, fwd_graph->outputs, xs, *ograd_entries,
+        exec::AggregateGradient, nullptr, nullptr,
+        zero_ops, "_copy");
+  }
+
+  // construct full graph
+  {
+    full_graph->outputs = fwd_graph->outputs;
+    for (const auto& i : grad_graph->outputs) 
full_graph->outputs.emplace_back(i);
+  }
+}
+
+void SetRefCounts(nnvm::Graph* fwd_graph, const nnvm::Graph& full_graph) {
+  const auto& idx = fwd_graph->indexed_graph();
+  CHECK_GE(idx.input_nodes().size(), 1) << "CachedOp requires at least 1 
input";
+
+  std::vector<uint32_t> ref_count(idx.num_node_entries(), 0);
+  for (const auto& i : idx.input_nodes()) ++ref_count[idx.entry_id(i, 0)];
+  for (const auto& i : idx.outputs()) ++ref_count[idx.entry_id(i)];
+  for (size_t i = 0; i < idx.num_nodes(); ++i) {
+    for (const auto& j : idx[i].inputs) ++ref_count[idx.entry_id(j)];
+  }
+
+  fwd_graph->attrs[AddPrefix(FORWARD, REF_COUNT)] =
+      std::make_shared<dmlc::any>(std::move(ref_count));
+
+  size_t num_forward_nodes = idx.num_nodes();
+  size_t num_forward_entries = idx.num_node_entries();
+
+  const auto& full_idx = full_graph.indexed_graph();
+
+  std::vector<uint32_t> temp_ref_count(full_idx.num_node_entries(), 0);
+  for (size_t i = num_forward_nodes; i < full_idx.num_nodes(); ++i) {
+    for (const auto& j : full_idx[i].inputs) {
+       ++temp_ref_count[full_idx.entry_id(j)];
+    }
+  }
+
+  auto full_ref_count = fwd_graph->GetAttr<std::vector<uint32_t> 
>(AddPrefix(FORWARD,
+                                                                             
REF_COUNT));
+  for (size_t i = 0; i < num_forward_entries; ++i) full_ref_count.at(i) += 
temp_ref_count[i];
+  fwd_graph->attrs[AddPrefix(FULL, REF_COUNT)] =
+      std::make_shared<dmlc::any>(std::move(full_ref_count));
+}
+
+void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, 
nnvm::Graph * grad_graph,
+                   const Context& context, size_t num_forward_outputs, const 
bool inlining) {
+#if MXNET_USE_CUDA && !defined(_WIN32)
+  if (context.dev_mask() == kGPU &&
 
 Review comment:
   can this be factored out?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-mxnet] larroy commented on a change in pull request #16654: Multithreaded Inference Support

Reply via email to