I guess there is no way to enforce parallelism to GraphRuntime. Parallelism
only exists in the module. The GraphRuntime is designed to realize
heterogeneous execution.
```
Module GraphRuntimeCreate(const std::string& sym_json,
const tvm::runtime::Module& m,
const std::vector<TVMContext>& ctxs) {
auto exec = make_object<GraphRuntime>();
exec->Init(sym_json, m, ctxs);
return Module(exec);
}
```
I guess the json is compute graph, module is the packed function used in the
compute graph, and context tells runtime to choose the correct device api.
```
std::pair<std::function<void()>, std::shared_ptr<GraphRuntime::OpArgs> >
GraphRuntime::CreateTVMOp(
const TVMOpParam& param,
const std::vector<DLTensor>& args,
size_t num_inputs) {
std::shared_ptr<GraphRuntime::OpArgs> arg_ptr =
std::make_shared<GraphRuntime::OpArgs>();
// setup address.
arg_ptr->args = args;
if (param.flatten_data) {
arg_ptr->shape_data.resize(arg_ptr->args.size());
}
for (size_t i = 0; i < arg_ptr->args.size(); ++i) {
TVMValue v;
DLTensor* t = &arg_ptr->args[i];
v.v_handle = t;
arg_ptr->arg_values.push_back(v);
arg_ptr->arg_tcodes.push_back(kTVMDLTensorHandle);
if (param.flatten_data) {
arg_ptr->shape_data[i] = std::accumulate(
t->shape, t->shape + t->ndim, 1, std::multiplies<int64_t>());
t->ndim = 1;
t->shape = &(arg_ptr->shape_data[i]);
}
}
if (param.func_name == "__nop") {
return {[](){}, arg_ptr};
} else if (param.func_name == "__copy") {
// Perform cross device data copy.
// Directly copy data from the input to the output.
auto fexec = [arg_ptr]() {
DLTensor* from = static_cast<DLTensor*>(arg_ptr->arg_values[0].v_handle);
DLTensor* to = static_cast<DLTensor*>(arg_ptr->arg_values[1].v_handle);
TVM_CCALL(TVMArrayCopyFromTo(from, to, nullptr));
};
return {fexec, arg_ptr};
}
// Get compiled function from the module that contains both host and device
// code.
tvm::runtime::PackedFunc pf = module_.GetFunction(param.func_name, true);
CHECK(pf != nullptr) << "no such function in module: " << param.func_name;
auto fexec = [arg_ptr, pf]() {
TVMRetValue rv;
TVMArgs targs(arg_ptr->arg_values.data(),
arg_ptr->arg_tcodes.data(),
static_cast<int>(arg_ptr->arg_values.size()));
pf.CallPacked(targs, &rv);
};
return {fexec, arg_ptr};
}
```
There is no strategy to enforce parallelism to the op_execs_. It just does what
graph json tells.
---
[Visit
Topic](https://discuss.tvm.ai/t/execution-order-of-operators-at-runtime-in-tvm/6572/5)
to respond.
You are receiving this because you enabled mailing list mode.
To unsubscribe from these emails, [click
here](https://discuss.tvm.ai/email/unsubscribe/4680ccab63aa0c88d5b225d1bb8e6407934bb2e367d3b203eec35546a53268ef).