jroesch commented on a change in pull request #8788: URL: https://github.com/apache/tvm/pull/8788#discussion_r693113525
########## File path: docs/langref/relay_op.rst ########## @@ -215,7 +215,7 @@ This level support backpropagation of broadcast operators. It is temporary. tvm.relay.ndarray_size tvm.relay.layout_transform tvm.relay.device_copy - tvm.relay.annotation.on_device + tvm.relay.annotation.on_device_op Review comment: What is the reason for renaming the op? ########## File path: include/tvm/runtime/logging.h ########## @@ -409,6 +411,57 @@ inline bool DebugLoggingEnabled() { return state == 1; } +/*! + * \brief Returns true if \p filename is mentioned in the environment + * variable \p TVM_LOG_DEBUG as enabled for 'verbose' logging at /p level. + * Levels go up to 9. + * + * To enable file \p foo.cc for level >= 2 and \p bar.cc for level >= 0 set: + * + * \code + * TVM_LOG_DEBUG="1;foo.cc=2;bar.cc=0;" + * \endcode + */ +bool VerboseLoggingEnabled(const char* filename, int level); Review comment: Is there a reason that we need Verbose logging vs. using DLOG? ########## File path: src/relay/op/annotation/annotation.cc ########## @@ -36,18 +36,18 @@ namespace tvm { namespace relay { -// relay.annotation.on_device +// relay.annotation.on_device_op TVM_REGISTER_NODE_TYPE(OnDeviceAttrs); -TVM_REGISTER_GLOBAL("relay.op.annotation._make.on_device") Review comment: Same as above Q. ########## File path: src/relay/backend/interpreter.cc ########## @@ -913,17 +917,14 @@ std::pair<IRModule, Map<String, IRModule>> Prepare(IRModule mod, Device device, // We only have one device-specific target. tec::TargetMap targets = {{device.device_type, target}}; - // All calls to primitives will use the unique target. Review comment: This is great. ########## File path: src/relay/backend/graph_executor_codegen.cc ########## @@ -197,32 +200,30 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector< LoweredOutput Codegen(relay::Function func, String mod_name) { mod_name_ = mod_name; - // TODO(@jroesch): we need to split device planning and memory planning - // first we run device assignment, then we perform lowering, and then - // storage planning in ideal world. - - memory_plan_ = GraphPlanMemory(func); - // This first phase moves from implicit use of compile engine, // to instead explicitly lowering the incoming IRModule, and then // performing the preexisting graph executor code generation phase. IRModule mod = IRModule::FromExpr(func); - // Build a map from each operation to device. - tec::DeviceMap device_context_map; - for (const auto& it : memory_plan_->expr_to_storage_info) { - auto expr = it.first; - auto storage_info = it.second; - auto device_types = storage_info->device_types; - // CHECK_EQ(device_types.size(), 1); - tvm::Device dev; - dev.device_id = 0; - dev.device_type = device_types[0]; - device_context_map.insert({expr, dev}); + // Make explicit which device each expression should be executed on. Review comment: Same Q as above, also can we put this code in some kind of helper? ########## File path: src/relay/backend/te_compiler.cc ########## @@ -518,54 +515,55 @@ class LowerTensorExprMutator : public ExprMutator { } } - Expr VisitExpr_(const CallNode* call) override { - Call expr = GetRef<Call>(call); + Expr VisitExpr_(const CallNode* call_node) override { + Device device = GetOnDeviceDevice(call_node); + if (device.device_type) { + PushDevice(device); + Expr arg = VisitExpr(call_node->args[0]); + PopDevice(); + return Call(call_node->op, {arg}, call_node->attrs, call_node->type_args, call_node->span); + } + + Call expr = GetRef<Call>(call_node); // Look for (indirect) calls to primitives. - Function prim_func = ResolveToPrimitive(call->op); + Function prim_func = ResolveToPrimitive(call_node->op); if (!prim_func.defined()) { - // Not a call to a primitive function. - if (const FunctionNode* fn = call->op.as<FunctionNode>()) { + // Not a call_node to a primitive function. + if (const FunctionNode* fn = call_node->op.as<FunctionNode>()) { this->process_fn_(GetRef<Function>(fn)); } - return ExprMutator::VisitExpr_(call); + return ExprMutator::VisitExpr_(call_node); } // Find the desired target device. Target target; if (prim_func->GetAttr<String>(attr::kCompiler).defined()) { // The generic 'external device' target. target = Target("ext_dev"); - } else if (device_context_map_.empty() && targets_.size() == 1) { - // The unique target. - target = GetTargetFromInteger(kDLCPU, targets_); } else { - // The target corresponding to the call expression's annotation. - auto itr = device_context_map_.find(expr); - ICHECK(itr != device_context_map_.end()) - << "Could not find an entry in the device context map for " << expr - << "The memory planning was either not performed for this precise node, or there is " - "bug in the memory planner."; - target = GetTargetFromInteger(itr->second.device_type, targets_); + // The target corresponding to the call_node expression's annotation. + device = GetDevice(expr); + // TODO(mbs): device id + target = GetTargetFromInteger(device.device_type, targets_); Review comment: This will become a lookup after we refactor the map? ########## File path: src/relay/backend/graph_plan_memory.cc ########## @@ -452,7 +469,7 @@ class StorageAllocator : public StorageAllocaBaseVisitor { // all the storage resources available std::vector<StorageToken*> data_; /*! \brief internal prototype token map */ - std::unordered_map<const ExprNode*, std::vector<StorageToken*> > prototype_; + std::unordered_map<const ExprNode*, std::vector<StorageToken*>> prototype_; }; StaticMemoryPlan GraphPlanMemory(const Function& func) { return StorageAllocator().Plan(func); } Review comment: A related question should we do the Array of struct vs. Struct of array swap here? the memory plan data structure is actually kind of funky right now due to the fact it maps ops to list of storages per device. ########## File path: src/relay/transforms/device_annotation.cc ########## @@ -320,191 +326,11 @@ class AnnotatationVisitor : private ExprVisitor { Map<Expr, Integer> annotations_; }; -/* - * \brief Return device allocation map based on the post order traversed graph. Review comment: Should we replace this with a similar updated comment? ########## File path: src/relay/backend/vm/compiler.cc ########## @@ -1041,13 +1013,31 @@ IRModule VMCompiler::OptimizeModule(IRModule mod, const TargetsMap& targets_arg, Array<Pass> pass_seqs = relay::backend::GetPassPrefix(targets, true); + Device default_device; Review comment: Is it possible for us to compute this outside the OptimizeModule, ideally this should just build the optimization pipeline and have minimal logic. ########## File path: src/relay/backend/aot_executor_codegen.cc ########## @@ -562,32 +558,35 @@ class AOTExecutorCodegen : public ExprVisitor { use_unpacked_api_(target_host->GetAttr<Bool>("unpacked-api").value_or(Bool(false))) {} LoweredOutput Codegen(relay::Function func, String mod_name) { - auto aot_allocator = AOTOnDemandAllocator(); - aot_allocator.Run(func); - - // Pre-lowering storage map and memory plan - StorageMap initial_storage_map = aot_allocator.GetStorageMap(); - StaticMemoryPlan memory_plan(initial_storage_map); - - // Build a map from each operation to device. - tec::DeviceMap device_context_map; - for (const auto& it : memory_plan->expr_to_storage_info) { - auto expr = it.first; - auto storage_info = it.second; - auto device_types = storage_info->device_types; - // CHECK_EQ(device_types.size(), 1); - tvm::Device dev; - dev.device_id = 0; - dev.device_type = device_types[0]; - device_context_map.insert({expr, dev}); - } - // This first phase moves from implicit use of compile engine, // to instead explicitly lowering the incoming IRModule, and then // performing the preexisting AOT executor code generation phase. IRModule mod = IRModule::FromExpr(func); + + // Make explicit which device each expression should be executed on. Review comment: This code will get folded away in future refactors as we flow the target/device information around? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org