This is an automated email from the ASF dual-hosted git repository. andrewzhaoluo pushed a commit to branch aluo/run-individual-node in repository https://gitbox.apache.org/repos/asf/tvm.git
commit f8718669194308921dc2d0a4b9137a2f739a45d0 Author: Andrew Zhao Luo <andrew.zhao....@gmail.com> AuthorDate: Tue Apr 12 15:42:43 2022 -0700 initial --- .../graph_executor/debug/graph_executor_debug.cc | 114 +++++++++++++-------- 1 file changed, 74 insertions(+), 40 deletions(-) diff --git a/src/runtime/graph_executor/debug/graph_executor_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc index 12a739722a..dd95478e17 100644 --- a/src/runtime/graph_executor/debug/graph_executor_debug.cc +++ b/src/runtime/graph_executor/debug/graph_executor_debug.cc @@ -67,48 +67,21 @@ class GraphExecutorDebug : public GraphExecutor { time_sec_per_op[index] += RunOpRPC(index, number, repeat, min_repeat_ms); } } else { - for (int i = 0; i < repeat; ++i) { - std::chrono::time_point<std::chrono::high_resolution_clock, std::chrono::nanoseconds> - tbegin, tend; - double duration_ms = 0.0; - do { - std::fill(time_sec_per_op.begin(), time_sec_per_op.end(), 0); - if (duration_ms > 0.0) { - number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1), - number * 1.618)); // 1.618 is chosen by random - } - tbegin = std::chrono::high_resolution_clock::now(); - std::vector<std::vector<Timer>> op_timers; - for (size_t index = 0; index < op_execs_.size(); index++) { - op_timers.push_back({}); - } - for (int k = 0; k < number; k++) { - for (size_t index = 0; index < op_execs_.size(); ++index) { - if (op_execs_[index]) { - op_timers[index].push_back(RunOpHost(index)); - } - } - } - for (size_t index = 0; index < op_execs_.size(); ++index) { - for (auto t : op_timers[index]) { - time_sec_per_op[index] += t->SyncAndGetElapsedNanos() / 1e9; - } - } - tend = std::chrono::high_resolution_clock::now(); - duration_ms = - std::chrono::duration_cast<std::chrono::duration<double>>(tend - tbegin).count() * - 1000; - } while (duration_ms < min_repeat_ms); - - LOG(INFO) << "Iteration: " << i; - int op = 0; - for (size_t index = 0; index < time_sec_per_op.size(); index++) { - if (op_execs_[index]) { - time_sec_per_op[index] /= number; - LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": " - << time_sec_per_op[index] * 1e6 << " us/iter"; + for (size_t index = 0; index < op_execs_.size(); ++index) { + std::vector<std::vector<double>> results = + RunIndividualNode(index, number, repeat, min_repeat_ms); + + double total = 0.0; + for (size_t cur_repeat = 0; cur_repeat < results.size(); cur_repeat++) { + std::vector<double>& timings = results[cur_repeat]; + double total_in_trial = 0; + for (double t : timings) { + total_in_trial += t; } + total_in_trial /= timings.size(); + total += total_in_trial; } + time_sec_per_op[index] = total / results.size(); } } @@ -119,6 +92,54 @@ class GraphExecutorDebug : public GraphExecutor { return os.str(); } + std::vector<std::vector<double>> RunIndividualNode(int node_index, int number, int repeat, + int min_repeat_ms) { + // warmup run + GraphExecutor::Run(); + std::string tkey = module_->type_key(); + + // results_in_seconds[a][b] is the bth index run of the ath index repeat + std::vector<std::vector<double>> results_in_seconds; + + if (tkey == "rpc") { + LOG(FATAL) << "RPC measurements should not use RunIndividualNode!"; + } + + for (int i = 0; i < repeat; ++i) { + std::vector<Timer> op_timers; + double duration_ms = 0.0; + + // Keep timing operations, upping number of repeats until we reach min_repeat_ms + do { + op_timers.clear(); + if (duration_ms > 0.0) { + number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1), + number * 1.618)); // 1.618 is chosen by random + } + + std::chrono::time_point<std::chrono::high_resolution_clock, std::chrono::nanoseconds> + tbegin, tend; + tbegin = std::chrono::high_resolution_clock::now(); + for (int k = 0; k < number; k++) { + if (op_execs_[node_index]) { + op_timers.push_back(RunOpHost(node_index)); + } + } + tend = std::chrono::high_resolution_clock::now(); + duration_ms = + std::chrono::duration_cast<std::chrono::duration<double>>(tend - tbegin).count() * 1000; + } while (duration_ms < min_repeat_ms); + + std::vector<double> timings_in_seconds; + for (Timer t : op_timers) { + timings_in_seconds.push_back(t->SyncAndGetElapsedNanos() / 1e9); + } + results_in_seconds.push_back(timings_in_seconds); + } + + return results_in_seconds; + } + double RunOpRPC(int index, int number, int repeat, int min_repeat_ms) { // Right now we expect either "tvm_op" for nodes which run PackedFunc or "null" for nodes which // represent inputs/parameters to the graph. Other types may be supported in the future, but @@ -362,6 +383,19 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name, ICHECK_GE(min_repeat_ms, 0); *rv = this->RunIndividual(number, repeat, min_repeat_ms); }); + } else if (name == "run_individual_node") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + int node_index = args[0]; + int number = args[1]; + int repeat = args[2]; + int min_repeat_ms = args[3]; + ICHECK_GE(node_index, 0); + ICHECK_LT(node_index, nodes_.size()); + ICHECK_GT(number, 0); + ICHECK_GT(repeat, 0); + ICHECK_GE(min_repeat_ms, 0); + *rv = this->RunIndividualNode(node_index, number, repeat, min_repeat_ms); + }); } else if (name == "profile") { return TypedPackedFunc<profiling::Report(Array<profiling::MetricCollector>)>( [sptr_to_self, this](Array<profiling::MetricCollector> collectors) {