[GitHub] [tvm] AndrewZhaoLuo commented on a diff in pull request #11000: [Graph Debugger] Expose way to benchmark individual nodes.

2022-04-25 Thread GitBox


AndrewZhaoLuo commented on code in PR #11000:
URL: https://github.com/apache/tvm/pull/11000#discussion_r858230661


##
tests/python/unittest/test_runtime_graph_debug.py:
##
@@ -185,5 +191,47 @@ def check_remote(server):
 check_remote(rpc.Server("127.0.0.1"))
 
 
+@tvm.testing.requires_llvm
+def test_run_single_node(graph):
+mlib_proxy = tvm.support.FrontendTestModule()
+mlib_proxy["myadd"] = myadd
+try:
+mod: debug_executor.GraphModuleDebug = debug_executor.create(graph, 
mlib_proxy, tvm.cpu(0))
+except ValueError:
+return

Review Comment:
   Cool, done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



[GitHub] [tvm] AndrewZhaoLuo commented on a diff in pull request #11000: [Graph Debugger] Expose way to benchmark individual nodes.

2022-04-25 Thread GitBox


AndrewZhaoLuo commented on code in PR #11000:
URL: https://github.com/apache/tvm/pull/11000#discussion_r858100679


##
tests/python/unittest/test_runtime_graph_debug.py:
##
@@ -19,26 +19,34 @@
 import re
 import sys
 import time
+from distutils.log import debug
 
+import numpy as np
 import pytest
-
 import tvm
 import tvm.testing
-from tvm import te
-import numpy as np
-from tvm import rpc
+from tvm import rpc, te
+from tvm._ffi.base import TVMError
 from tvm.contrib import utils
 from tvm.contrib.debugger import debug_executor
 
+# Constants for creating simple graphs
+n = 4
+A = te.placeholder((n,), name="A")
+B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
+s = te.create_schedule(B.op)

Review Comment:
   Done



##
tests/python/unittest/test_runtime_graph_debug.py:
##
@@ -185,5 +191,47 @@ def check_remote(server):
 check_remote(rpc.Server("127.0.0.1"))
 
 
+@tvm.testing.requires_llvm
+def test_run_single_node(graph):
+mlib_proxy = tvm.support.FrontendTestModule()
+mlib_proxy["myadd"] = myadd
+try:
+mod: debug_executor.GraphModuleDebug = debug_executor.create(graph, 
mlib_proxy, tvm.cpu(0))
+except ValueError:
+return
+
+a = np.random.uniform(size=(n,)).astype(A.dtype)
+mod.set_input(x=a)
+
+assert len(mod.debug_datum.get_graph_nodes()) == 2
+assert mod.debug_datum.get_graph_nodes()[0]["op"] == "param"
+assert mod.debug_datum.get_graph_nodes()[1]["op"] == "myadd"
+
+# Running a node with no associated function should return instantly and 
have 0 runtime
+mod.run_individual_node(0, number=1).mean == 0

Review Comment:
   Oopsy, done



##
tests/python/unittest/test_runtime_graph_debug.py:
##
@@ -185,5 +191,47 @@ def check_remote(server):
 check_remote(rpc.Server("127.0.0.1"))
 
 
+@tvm.testing.requires_llvm
+def test_run_single_node(graph):
+mlib_proxy = tvm.support.FrontendTestModule()
+mlib_proxy["myadd"] = myadd
+try:
+mod: debug_executor.GraphModuleDebug = debug_executor.create(graph, 
mlib_proxy, tvm.cpu(0))
+except ValueError:
+return
+
+a = np.random.uniform(size=(n,)).astype(A.dtype)
+mod.set_input(x=a)
+
+assert len(mod.debug_datum.get_graph_nodes()) == 2
+assert mod.debug_datum.get_graph_nodes()[0]["op"] == "param"
+assert mod.debug_datum.get_graph_nodes()[1]["op"] == "myadd"
+
+# Running a node with no associated function should return instantly and 
have 0 runtime
+mod.run_individual_node(0, number=1).mean == 0
+
+# Meanwhile the actual function should take some time, more time if you 
run it more times
+repeat_1_result = mod.run_individual_node(1, repeat=1)
+repeat_1_result.mean > 0

Review Comment:
   Oopsy, done



##
tests/python/unittest/test_runtime_graph_debug.py:
##
@@ -185,5 +191,47 @@ def check_remote(server):
 check_remote(rpc.Server("127.0.0.1"))
 
 
+@tvm.testing.requires_llvm
+def test_run_single_node(graph):
+mlib_proxy = tvm.support.FrontendTestModule()
+mlib_proxy["myadd"] = myadd
+try:
+mod: debug_executor.GraphModuleDebug = debug_executor.create(graph, 
mlib_proxy, tvm.cpu(0))
+except ValueError:
+return

Review Comment:
   This will be hit if TVM is not built with the profiler on. Added a comment 
clarifying



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



[GitHub] [tvm] AndrewZhaoLuo commented on a diff in pull request #11000: [Graph Debugger] Expose way to benchmark individual nodes.

2022-04-22 Thread GitBox


AndrewZhaoLuo commented on code in PR #11000:
URL: https://github.com/apache/tvm/pull/11000#discussion_r856618731


##
src/runtime/graph_executor/debug/graph_executor_debug.cc:
##
@@ -114,15 +94,69 @@ class GraphExecutorDebug : public GraphExecutor {
 
 std::ostringstream os;
 for (size_t index = 0; index < time_sec_per_op.size(); index++) {
-  os << time_sec_per_op[index] << ",";
+  double time = time_sec_per_op[index];
+  // To have good behavior when calculating total time, etc.
+  if (isnan(time)) {
+time = 0;
+  }
+  os << time << ",";
 }
 return os.str();
   }
 
+  std::vector> RunIndividualNode(int node_index, int 
number, int repeat,
+ int min_repeat_ms) {
+// warmup run
+// GraphExecutor::Run();
+std::string tkey = module_->type_key();
+
+// results_in_seconds[a][b] is the bth index run of the ath index repeat
+std::vector> results_in_seconds;
+
+if (tkey == "rpc") {
+  LOG(FATAL) << "RPC measurements should not use RunIndividualNode!";
+}
+
+for (int i = 0; i < repeat; ++i) {
+  std::vector op_timers;
+  double duration_ms = 0.0;
+
+  // Keep timing operations, upping number of repeats until we reach 
min_repeat_ms
+  do {
+op_timers.clear();
+if (duration_ms > 0.0) {
+  number = static_cast(std::max((min_repeat_ms / (duration_ms / 
number) + 1),
+ number * 1.618));  // 1.618 is 
chosen by random
+}
+
+std::chrono::time_point
+tbegin, tend;
+tbegin = std::chrono::high_resolution_clock::now();

Review Comment:
   Done, it now uses the linked time evaluator. Lots of deduped code B)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



[GitHub] [tvm] AndrewZhaoLuo commented on a diff in pull request #11000: [Graph Debugger] Expose way to benchmark individual nodes.

2022-04-22 Thread GitBox


AndrewZhaoLuo commented on code in PR #11000:
URL: https://github.com/apache/tvm/pull/11000#discussion_r856618606


##
python/tvm/contrib/debugger/debug_executor.py:
##
@@ -281,6 +282,42 @@ def run_individual(self, number, repeat=1, 
min_repeat_ms=0):
 ret = self._run_individual(number, repeat, min_repeat_ms)
 return ret.strip(",").split(",") if ret else []
 
+def run_individual_node(self, index, number, repeat=1, min_repeat_ms=0):
+"""Benchmark a single node in the serialized graph.
+
+Parameters
+--
+index : int
+The index of the node, see `self.debug_datum.get_graph_nodes`
+
+number: int
+The number of times to run the node to get a benchmark result.
+
+repeat: int
+The number of times to benchmark the nodes.

Review Comment:
   Done



##
python/tvm/contrib/debugger/debug_executor.py:
##
@@ -281,6 +282,42 @@ def run_individual(self, number, repeat=1, 
min_repeat_ms=0):
 ret = self._run_individual(number, repeat, min_repeat_ms)
 return ret.strip(",").split(",") if ret else []
 
+def run_individual_node(self, index, number, repeat=1, min_repeat_ms=0):
+"""Benchmark a single node in the serialized graph.
+
+Parameters
+--
+index : int
+The index of the node, see `self.debug_datum.get_graph_nodes`
+
+number: int
+The number of times to run the node to get a benchmark result.
+
+repeat: int
+The number of times to benchmark the nodes.
+
+min_repeat_ms: int
+The minimum consecutive runtime of the node for a benchmark result.
+
+Returns
+---
+A list of dimensions `number` x `repeat` each one the runtime of the 
node

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



[GitHub] [tvm] AndrewZhaoLuo commented on a diff in pull request #11000: [Graph Debugger] Expose way to benchmark individual nodes.

2022-04-18 Thread GitBox


AndrewZhaoLuo commented on code in PR #11000:
URL: https://github.com/apache/tvm/pull/11000#discussion_r852460978


##
python/tvm/contrib/debugger/debug_executor.py:
##
@@ -281,6 +282,42 @@ def run_individual(self, number, repeat=1, 
min_repeat_ms=0):
 ret = self._run_individual(number, repeat, min_repeat_ms)
 return ret.strip(",").split(",") if ret else []
 
+def run_individual_node(self, index, number, repeat=1, min_repeat_ms=0):
+"""Benchmark a single node in the serialized graph.

Review Comment:
   Done



##
python/tvm/contrib/debugger/debug_executor.py:
##
@@ -281,6 +282,42 @@ def run_individual(self, number, repeat=1, 
min_repeat_ms=0):
 ret = self._run_individual(number, repeat, min_repeat_ms)
 return ret.strip(",").split(",") if ret else []
 
+def run_individual_node(self, index, number, repeat=1, min_repeat_ms=0):
+"""Benchmark a single node in the serialized graph.
+
+Parameters
+--
+index : int
+The index of the node, see `self.debug_datum.get_graph_nodes`
+
+number: int
+The number of times to run the node to get a benchmark result.
+
+repeat: int
+The number of times to benchmark the nodes.
+
+min_repeat_ms: int
+The minimum consecutive runtime of the node for a benchmark result.
+
+Returns
+---
+A list of dimensions `number` x `repeat` each one the runtime of the 
node

Review Comment:
   Basically if have like 3 repeats of 3 numbers it would return a 3x3 
array/list.
   
   arr[0][1] would be the first repeat in the second number, arr[1][2] would be 
the repeat 2, number 3, etc.
   
   I think BenchmarkResult is better though since it seems to store the 
sequence of all float results anyway?



##
python/tvm/contrib/debugger/debug_executor.py:
##
@@ -281,6 +282,42 @@ def run_individual(self, number, repeat=1, 
min_repeat_ms=0):
 ret = self._run_individual(number, repeat, min_repeat_ms)
 return ret.strip(",").split(",") if ret else []
 
+def run_individual_node(self, index, number, repeat=1, min_repeat_ms=0):

Review Comment:
   done



##
python/tvm/contrib/debugger/debug_executor.py:
##
@@ -281,6 +282,42 @@ def run_individual(self, number, repeat=1, 
min_repeat_ms=0):
 ret = self._run_individual(number, repeat, min_repeat_ms)
 return ret.strip(",").split(",") if ret else []
 
+def run_individual_node(self, index, number, repeat=1, min_repeat_ms=0):
+"""Benchmark a single node in the serialized graph.
+
+Parameters
+--
+index : int
+The index of the node, see `self.debug_datum.get_graph_nodes`
+
+number: int
+The number of times to run the node to get a benchmark result.
+
+repeat: int
+The number of times to benchmark the nodes.

Review Comment:
   I'll probably just use time_evaluator, so will change later.



##
src/runtime/graph_executor/debug/graph_executor_debug.cc:
##
@@ -362,6 +396,33 @@ PackedFunc GraphExecutorDebug::GetFunction(const 
std::string& name,
   ICHECK_GE(min_repeat_ms, 0);
   *rv = this->RunIndividual(number, repeat, min_repeat_ms);
 });
+  } else if (name == "run_individual_node") {
+return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+  int node_index = args[0];
+  int number = args[1];
+  int repeat = args[2];
+  int min_repeat_ms = args[3];
+  ICHECK_GE(node_index, 0);
+  ICHECK_LT(node_index, nodes_.size());
+  ICHECK_GT(number, 0);
+  ICHECK_GT(repeat, 0);
+  ICHECK_GE(min_repeat_ms, 0);
+  std::vector> results =
+  this->RunIndividualNode(node_index, number, repeat, min_repeat_ms);
+
+  std::stringstream s;
+  s.precision(6);  // down to microseconds

Review Comment:
   Done



##
src/runtime/graph_executor/debug/graph_executor_debug.cc:
##
@@ -114,15 +94,69 @@ class GraphExecutorDebug : public GraphExecutor {
 
 std::ostringstream os;
 for (size_t index = 0; index < time_sec_per_op.size(); index++) {
-  os << time_sec_per_op[index] << ",";
+  double time = time_sec_per_op[index];
+  // To have good behavior when calculating total time, etc.
+  if (isnan(time)) {
+time = 0;
+  }

Review Comment:
   0 / 0 is possible from the above for nodes which do not have any associated 
execution function



##
src/runtime/graph_executor/debug/graph_executor_debug.cc:
##
@@ -114,15 +94,69 @@ class GraphExecutorDebug : public GraphExecutor {
 
 std::ostringstream os;
 for (size_t index = 0; index < time_sec_per_op.size(); index++) {
-  os << time_sec_per_op[index] << ",";
+  double time = time_sec_per_op[index];
+  // To have good behavior when calculating total time, etc.
+  if (isnan(time)) {
+