manupa-arm commented on a change in pull request #10189:
URL: https://github.com/apache/tvm/pull/10189#discussion_r827250101
##########
File path: include/tvm/tir/usmp/utils.h
##########
@@ -44,6 +44,126 @@ constexpr const char* kUSMPAlgorithmOption =
"tir.usmp.algorithm";
namespace tir {
namespace usmp {
+/*
+ * \brief The ConstantInfoNode contains numeric literal in RO pool
+ */
+struct ConstantInfoNode : public Object {
+ String name_hint;
+ Integer byte_alignment;
+ Integer byte_offset;
+ runtime::NDArray data;
+
+ void VisitAttrs(tvm::AttrVisitor* v) {
+ v->Visit("constant_names", &name_hint);
+ v->Visit("constant_alignment", &byte_alignment);
+ v->Visit("constant_offsets", &byte_offset);
+ v->Visit("constant_data", &data);
+ }
+
+ bool SEqualReduce(const ConstantInfoNode* other, SEqualReducer equal) const {
+ return equal(name_hint, other->name_hint) && equal(byte_alignment,
other->byte_alignment) &&
+ equal(byte_offset, other->byte_offset) && equal(data, other->data);
+ }
+
+ void SHashReduce(SHashReducer hash_reduce) const {
+ hash_reduce(name_hint);
+ hash_reduce(byte_alignment);
+ hash_reduce(byte_offset);
+ hash_reduce(data);
+ }
+
+ static constexpr const char* _type_key = "tir.usmp.ConstantInfo";
+ static constexpr bool _type_has_method_sequal_reduce = true;
+ static constexpr bool _type_has_method_shash_reduce = true;
+ TVM_DECLARE_FINAL_OBJECT_INFO(ConstantInfoNode, Object);
+};
+
+class ConstantInfo : public ObjectRef {
+ public:
+ TVM_DLL ConstantInfo(String name, Integer byte_alignment, Integer
byte_offset,
+ runtime::NDArray data);
+ TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(ConstantInfo, ObjectRef,
ConstantInfoNode);
+};
+
+#if 0
Review comment:
Please remove this
##########
File path: src/tir/analysis/calculate_workspace.cc
##########
@@ -77,6 +69,16 @@ void WorkspaceCalculator::VisitStmt_(const AllocateNode* op)
{
current_size -= size;
}
+void WorkspaceCalculator::VisitStmt_(const AllocateConstNode* op) {
Review comment:
Why is this Pass being changed here ? I dont think we need to touch this
Pass in this PR
##########
File path: src/target/source/source_module.cc
##########
@@ -237,14 +241,65 @@ class CSourceCrtMetadataModuleNode : public
runtime::ModuleNode {
}
void GenerateInternalWorkspaceBuffers() {
+ Integer constants_byte_alignment_ =
+ target_->GetAttr<Integer>("workspace-byte-alignment").value_or(16);
+ size_t offset = 0;
if (metadata_->pool_inputs.defined()) {
for (const auto& kv : metadata_->pool_inputs.value()) {
tir::usmp::AllocatedPoolInfo allocated_pool_info = kv.second;
if (allocated_pool_info->pool_info->is_internal) {
- code_ << "__attribute__((section(\".data.tvm\"), ";
- code_ << "aligned(" << 16 << ")))\n";
- code_ << "static uint8_t " <<
allocated_pool_info->pool_info->pool_name << "["
- << allocated_pool_info->allocated_size->value << "];\n";
+ do {
+ for (const auto& kv :
allocated_pool_info->pool_info->target_access) {
+ if (kv.first->str() == target_->str()) {
+ if (kTargetPoolReadOnlyAccess == kv.second) {
+ // Pool is RO, form an initialized struct
+ code_ << "__attribute__((section(\".rodata.tvm\"), ";
+ code_ << "))\n";
+ code_ << "static struct " <<
allocated_pool_info->pool_info->pool_name << " {\n";
+ // emit struct field names
+ auto const_info_arr = allocated_pool_info->constant_info_arr;
+ std::vector<tir::usmp::ConstantInfo>
const_info_vec(const_info_arr.begin(),
+
const_info_arr.end());
+ std::sort(const_info_vec.begin(), const_info_vec.end(),
+ [](const tir::usmp::ConstantInfo& a, const
tir::usmp::ConstantInfo& b) {
+ return a->byte_offset->value <
b->byte_offset->value;
+ });
+ for (const auto& const_info : const_info_vec) {
+ const auto& data = const_info->data;
+ const auto& offs = const_info->byte_offset;
+ int64_t num_elements =
std::accumulate(data.Shape().begin(), data.Shape().end(),
+ 1,
std::multiplies<int64_t>());
+ code_ << " ";
+ codegen_c_base_.PrintType(data.DataType(), code_);
+ code_ << " " << const_info->name_hint << "[" <<
num_elements
+ << "] __attribute__((packed, aligned(" <<
const_info->byte_alignment
+ << ")));";
+ code_ << " // " << num_elements * data.DataType().bytes()
+ << " bytes, aligned offset: " << offs << "\n";
+ }
+ code_ << "} " << allocated_pool_info->pool_info->pool_name
<< " = {\n";
+
+ // emit struct field initialization data
+ for (const auto& const_info : const_info_vec) {
+ code_ << " ." << const_info->name_hint << " = {\n";
+ codegen::NDArrayDataToC(const_info->data, 4, code_);
+ code_ << " },\n";
+ }
+ code_ << "};";
+ code_ << "// of total size " <<
allocated_pool_info->allocated_size->value
+ << " bytes, aligned: " << offset << " bytes\n";
+
+ goto endOfLoop;
+ }
+ }
+ }
+ code_ << "__attribute__((section(\".data.tvm\"), ";
+ code_ << "aligned(" << 16 << ")))\n";
+ code_ << "static uint8_t " <<
allocated_pool_info->pool_info->pool_name << "[";
+ code_ << allocated_pool_info->allocated_size->value << "];\n";
+
+ endOfLoop : {}
+ } while (false);
Review comment:
Im not sure why we need a while(false)
##########
File path: src/target/source/source_module.cc
##########
@@ -237,14 +241,65 @@ class CSourceCrtMetadataModuleNode : public
runtime::ModuleNode {
}
void GenerateInternalWorkspaceBuffers() {
+ Integer constants_byte_alignment_ =
+ target_->GetAttr<Integer>("workspace-byte-alignment").value_or(16);
+ size_t offset = 0;
if (metadata_->pool_inputs.defined()) {
for (const auto& kv : metadata_->pool_inputs.value()) {
tir::usmp::AllocatedPoolInfo allocated_pool_info = kv.second;
if (allocated_pool_info->pool_info->is_internal) {
- code_ << "__attribute__((section(\".data.tvm\"), ";
- code_ << "aligned(" << 16 << ")))\n";
- code_ << "static uint8_t " <<
allocated_pool_info->pool_info->pool_name << "["
- << allocated_pool_info->allocated_size->value << "];\n";
+ do {
+ for (const auto& kv :
allocated_pool_info->pool_info->target_access) {
+ if (kv.first->str() == target_->str()) {
+ if (kTargetPoolReadOnlyAccess == kv.second) {
Review comment:
Let us use the presence of constant init data instead to figure this one
out.
I'm beginning to think whether we should just drop target based
kTargetPoolReadOnlyAccess access specifiers, rather I think we can specialize
the pools to either be WorkspacePools or ConstantPools. WDYT ?
##########
File path: include/tvm/tir/usmp/utils.h
##########
@@ -44,6 +44,126 @@ constexpr const char* kUSMPAlgorithmOption =
"tir.usmp.algorithm";
namespace tir {
namespace usmp {
+/*
+ * \brief The ConstantInfoNode contains numeric literal in RO pool
+ */
+struct ConstantInfoNode : public Object {
Review comment:
We need docs for each thing similiar to BufferInfo node.
##########
File path: src/relay/backend/aot_executor_codegen.cc
##########
@@ -817,6 +813,11 @@ class AOTExecutorCodegen : public MixedModeVisitor {
return lowered_mod;
}
+ Integer getModuleAlignment(const IRModule& mod) {
Review comment:
Please add a brief and lets be consistent with the function naming style
##########
File path: src/target/source/source_module.cc
##########
@@ -237,14 +241,65 @@ class CSourceCrtMetadataModuleNode : public
runtime::ModuleNode {
}
void GenerateInternalWorkspaceBuffers() {
+ Integer constants_byte_alignment_ =
+ target_->GetAttr<Integer>("workspace-byte-alignment").value_or(16);
+ size_t offset = 0;
if (metadata_->pool_inputs.defined()) {
for (const auto& kv : metadata_->pool_inputs.value()) {
tir::usmp::AllocatedPoolInfo allocated_pool_info = kv.second;
if (allocated_pool_info->pool_info->is_internal) {
- code_ << "__attribute__((section(\".data.tvm\"), ";
- code_ << "aligned(" << 16 << ")))\n";
- code_ << "static uint8_t " <<
allocated_pool_info->pool_info->pool_name << "["
- << allocated_pool_info->allocated_size->value << "];\n";
+ do {
+ for (const auto& kv :
allocated_pool_info->pool_info->target_access) {
+ if (kv.first->str() == target_->str()) {
+ if (kTargetPoolReadOnlyAccess == kv.second) {
+ // Pool is RO, form an initialized struct
+ code_ << "__attribute__((section(\".rodata.tvm\"), ";
+ code_ << "))\n";
+ code_ << "static struct " <<
allocated_pool_info->pool_info->pool_name << " {\n";
+ // emit struct field names
+ auto const_info_arr = allocated_pool_info->constant_info_arr;
+ std::vector<tir::usmp::ConstantInfo>
const_info_vec(const_info_arr.begin(),
+
const_info_arr.end());
+ std::sort(const_info_vec.begin(), const_info_vec.end(),
+ [](const tir::usmp::ConstantInfo& a, const
tir::usmp::ConstantInfo& b) {
+ return a->byte_offset->value <
b->byte_offset->value;
+ });
+ for (const auto& const_info : const_info_vec) {
+ const auto& data = const_info->data;
+ const auto& offs = const_info->byte_offset;
+ int64_t num_elements =
std::accumulate(data.Shape().begin(), data.Shape().end(),
+ 1,
std::multiplies<int64_t>());
+ code_ << " ";
+ codegen_c_base_.PrintType(data.DataType(), code_);
+ code_ << " " << const_info->name_hint << "[" <<
num_elements
+ << "] __attribute__((packed, aligned(" <<
const_info->byte_alignment
+ << ")));";
+ code_ << " // " << num_elements * data.DataType().bytes()
+ << " bytes, aligned offset: " << offs << "\n";
+ }
+ code_ << "} " << allocated_pool_info->pool_info->pool_name
<< " = {\n";
+
+ // emit struct field initialization data
+ for (const auto& const_info : const_info_vec) {
+ code_ << " ." << const_info->name_hint << " = {\n";
+ codegen::NDArrayDataToC(const_info->data, 4, code_);
+ code_ << " },\n";
+ }
+ code_ << "};";
+ code_ << "// of total size " <<
allocated_pool_info->allocated_size->value
+ << " bytes, aligned: " << offset << " bytes\n";
+
+ goto endOfLoop;
Review comment:
Lets avoid goto in production code.
##########
File path: src/target/source/source_module.cc
##########
@@ -237,14 +241,65 @@ class CSourceCrtMetadataModuleNode : public
runtime::ModuleNode {
}
void GenerateInternalWorkspaceBuffers() {
Review comment:
Lets create a seperate function to GenerateInternalConstantBuffer() and
do not overload GenerateInternalWorkspaceBuffers().
##########
File path: src/tir/usmp/transform/assign_pool_info.cc
##########
@@ -48,18 +48,26 @@ class PoolInfoAssigner : public StmtExprMutator {
ICHECK(target_host) << "main function does not have a target attr";
WorkspaceMemoryPools workspace_pools =
module->GetAttr<WorkspaceMemoryPools>(tvm::attr::kWorkspaceMemoryPools)
- .value_or(WorkspaceMemoryPools({CreateDefaultMemoryPool(module)}));
+ .value_or(WorkspaceMemoryPools(
Review comment:
Please create a new structure for ConstantMemoryPools (I mentioned this
in another comment)
##########
File path: include/tvm/tir/usmp/utils.h
##########
@@ -187,22 +312,27 @@ struct AllocatedPoolInfoNode : public Object {
Integer allocated_size;
/*! \brief An optional associated pool Var index of PrimFunc params*/
Optional<Integer> pool_var_idx;
+ /*! \brief pool initialization data */
+ Array<ConstantInfo> constant_info_arr;
Review comment:
In second thought, how about we extend PoolInfo to be WorkspacePoolInfo
and ConstantPoolInfo ? where the only the latter has constant_info_arr.
##########
File path: src/tir/usmp/analysis/extract_buffer_info.cc
##########
@@ -278,9 +286,78 @@ void BufferInfoExtractor::VisitStmt_(const AllocateNode*
op) {
current_scope_info.allocate_nodes.erase(GetRef<Allocate>(op));
}
+void BufferInfoExtractor::RecordAllocateConstNodeInfo(const AllocateConstNode*
op) {
+ if (!op->annotations.count(kPoolCandidatesAllocateAttr)) {
+ return;
+ }
+ Integer size_bytes = CalculateExtentsSize(op);
+ const auto& buffer_var = op->buffer_var;
+ // We only statically memory plan only allocates with known
+ // compile time sizes.
+ if (size_bytes.defined()) {
+ if (allocate_infos.find(buffer_var) == allocate_infos.end()) {
+ // By default, the core compiler is assumed to attach the a default pool
to each allocate.
+ ICHECK(op->annotations.count(kPoolCandidatesAllocateAttr))
+ << "Every statically sized allocate node needs an pool candidate
attribute";
+ auto pool_candidates =
+
Downcast<Array<PoolInfo>>(op->annotations[kPoolCandidatesAllocateAttr]);
+
+ // TODO(@manupa-arm): improve the error when the responsible component
for attaching a single
+ // pool is added
+ ICHECK(pool_candidates.size() > 0)
+ << "The core compiler should at least attach a single PoolInfo. If
there were no "
+ "user-given arguments for memory pools, the default behaviour is
a single size "
+ "un-restricted pool is assigned";
+ PrimFunc func = scope_stack_.top().func;
+ Optional<tvm::relay::Executor> executor_config =
+ module_->GetAttr<tvm::relay::Executor>(tvm::attr::kExecutor);
+ Integer workspace_alignment = 16;
+ if (executor_config) {
+ workspace_alignment = executor_config.value()
+
->GetAttr<Integer>("workspace-byte-alignment")
+ .value_or(workspace_alignment);
+ }
+ auto buffer_info =
BufferInfo(GetUniqueBufferName(buffer_var->name_hint), size_bytes,
+ pool_candidates, workspace_alignment);
+ auto allocate = GetRef<AllocateConst>(op);
+ allocate_infos[buffer_var] =
+ AllocateInfo{allocate, scope_stack_.top().func,
scope_stack_.top().call};
+ buffer_info_map_.Set(buffer_info, allocate);
+ } else {
+ // Update the allocate info with the latest call
+ AllocateInfo ai = allocate_infos[buffer_var];
+ ai.call = scope_stack_.top().call;
+ allocate_infos[buffer_var] = ai;
+ }
+ }
+}
+
+void BufferInfoExtractor::VisitStmt_(const AllocateConstNode* op) {
+ ScopeInfo& current_scope_info = scope_stack_.top();
+ const auto& type = Downcast<PointerType>(op->buffer_var->type_annotation);
+ const auto& storage_scope = type->storage_scope;
+
+ // If the allocate is in a for loop, USMP currently only looks at serial for
loops.
+ // If its not a serial for loop, then memory planner will omit them in the
current memory planning
+ // process leaving them to as tir.allocate nodes for codegen. Additionally,
the USMP can only work
+ // with buffers that have global storage_scope
Review comment:
This comment is not applicable to constant nodes.
##########
File path: src/tir/usmp/analysis/extract_buffer_info.cc
##########
@@ -278,9 +286,78 @@ void BufferInfoExtractor::VisitStmt_(const AllocateNode*
op) {
current_scope_info.allocate_nodes.erase(GetRef<Allocate>(op));
}
+void BufferInfoExtractor::RecordAllocateConstNodeInfo(const AllocateConstNode*
op) {
+ if (!op->annotations.count(kPoolCandidatesAllocateAttr)) {
+ return;
+ }
+ Integer size_bytes = CalculateExtentsSize(op);
+ const auto& buffer_var = op->buffer_var;
+ // We only statically memory plan only allocates with known
+ // compile time sizes.
+ if (size_bytes.defined()) {
+ if (allocate_infos.find(buffer_var) == allocate_infos.end()) {
+ // By default, the core compiler is assumed to attach the a default pool
to each allocate.
+ ICHECK(op->annotations.count(kPoolCandidatesAllocateAttr))
+ << "Every statically sized allocate node needs an pool candidate
attribute";
+ auto pool_candidates =
+
Downcast<Array<PoolInfo>>(op->annotations[kPoolCandidatesAllocateAttr]);
+
+ // TODO(@manupa-arm): improve the error when the responsible component
for attaching a single
+ // pool is added
+ ICHECK(pool_candidates.size() > 0)
+ << "The core compiler should at least attach a single PoolInfo. If
there were no "
+ "user-given arguments for memory pools, the default behaviour is
a single size "
+ "un-restricted pool is assigned";
+ PrimFunc func = scope_stack_.top().func;
+ Optional<tvm::relay::Executor> executor_config =
+ module_->GetAttr<tvm::relay::Executor>(tvm::attr::kExecutor);
+ Integer workspace_alignment = 16;
+ if (executor_config) {
+ workspace_alignment = executor_config.value()
+
->GetAttr<Integer>("workspace-byte-alignment")
+ .value_or(workspace_alignment);
+ }
+ auto buffer_info =
BufferInfo(GetUniqueBufferName(buffer_var->name_hint), size_bytes,
+ pool_candidates, workspace_alignment);
+ auto allocate = GetRef<AllocateConst>(op);
+ allocate_infos[buffer_var] =
+ AllocateInfo{allocate, scope_stack_.top().func,
scope_stack_.top().call};
+ buffer_info_map_.Set(buffer_info, allocate);
+ } else {
+ // Update the allocate info with the latest call
+ AllocateInfo ai = allocate_infos[buffer_var];
+ ai.call = scope_stack_.top().call;
+ allocate_infos[buffer_var] = ai;
+ }
+ }
+}
+
+void BufferInfoExtractor::VisitStmt_(const AllocateConstNode* op) {
+ ScopeInfo& current_scope_info = scope_stack_.top();
+ const auto& type = Downcast<PointerType>(op->buffer_var->type_annotation);
+ const auto& storage_scope = type->storage_scope;
+
+ // If the allocate is in a for loop, USMP currently only looks at serial for
loops.
+ // If its not a serial for loop, then memory planner will omit them in the
current memory planning
+ // process leaving them to as tir.allocate nodes for codegen. Additionally,
the USMP can only work
+ // with buffers that have global storage_scope
+
+ if (!current_scope_info.for_loop.defined()) {
+ RecordAllocateConstNodeInfo(op);
+ } else if (current_scope_info.for_loop.defined() &&
+ current_scope_info.for_loop->kind == ForKind::kSerial &&
storage_scope == "global") {
+ RecordAllocateConstNodeInfo(op);
Review comment:
I dont think the presense of the loops matter to constants and its
liveness compared to AllocateNode.
Therefore we should be able to remove all these code.
##########
File path: src/tir/usmp/analysis/extract_buffer_info.cc
##########
@@ -278,9 +286,78 @@ void BufferInfoExtractor::VisitStmt_(const AllocateNode*
op) {
current_scope_info.allocate_nodes.erase(GetRef<Allocate>(op));
}
+void BufferInfoExtractor::RecordAllocateConstNodeInfo(const AllocateConstNode*
op) {
+ if (!op->annotations.count(kPoolCandidatesAllocateAttr)) {
+ return;
+ }
+ Integer size_bytes = CalculateExtentsSize(op);
+ const auto& buffer_var = op->buffer_var;
+ // We only statically memory plan only allocates with known
+ // compile time sizes.
+ if (size_bytes.defined()) {
+ if (allocate_infos.find(buffer_var) == allocate_infos.end()) {
+ // By default, the core compiler is assumed to attach the a default pool
to each allocate.
+ ICHECK(op->annotations.count(kPoolCandidatesAllocateAttr))
+ << "Every statically sized allocate node needs an pool candidate
attribute";
+ auto pool_candidates =
+
Downcast<Array<PoolInfo>>(op->annotations[kPoolCandidatesAllocateAttr]);
+
+ // TODO(@manupa-arm): improve the error when the responsible component
for attaching a single
Review comment:
I think the error message is improved and dont add TODOs against me :)
##########
File path: src/tir/transforms/legalize_packed_calls.cc
##########
@@ -49,7 +49,9 @@ class PackedCallLegalizer : public StmtExprMutator {
}
Stmt VisitStmt_(const EvaluateNode* op) final {
- if (tir::is_const_int(op->value)) return StmtExprMutator::VisitStmt_(op);
+ if (tir::is_const_int(op->value)) {
Review comment:
Not sure why these changes are required
##########
File path: src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc
##########
@@ -92,6 +99,8 @@ class PoolAllocationToOffsetConverter : public
StmtExprMutator {
PrimExpr VisitExpr_(const BufferLoadNode* op) override;
Stmt VisitStmt_(const BufferStoreNode* op) override;
Review comment:
Note to myself : Reviewed up to here.
##########
File path: include/tvm/tir/usmp/utils.h
##########
@@ -187,22 +312,27 @@ struct AllocatedPoolInfoNode : public Object {
Integer allocated_size;
/*! \brief An optional associated pool Var index of PrimFunc params*/
Optional<Integer> pool_var_idx;
+ /*! \brief pool initialization data */
+ Array<ConstantInfo> constant_info_arr;
Review comment:
Please make it Optional
##########
File path: src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc
##########
@@ -53,14 +54,20 @@ class PoolAllocationToOffsetConverter : public
StmtExprMutator {
: pool_allocations_(pool_allocations),
emit_tvmscript_printable_(emit_tvmscript_printable) {
module_ = module->ShallowCopy();
for (const auto& kv : pool_allocations) {
- // TODO(@manupa-arm): add AllocateConstNode when it is available
- ICHECK(kv.first->IsInstance<AllocateNode>());
- Allocate allocate_node = Downcast<Allocate>(kv.first);
+ size_t extent_size = -1;
+ if (kv.first->IsInstance<AllocateNode>()) {
+ Allocate allocate_node = Downcast<Allocate>(kv.first);
+ extent_size = CalculateExtentsSize(allocate_node.operator->());
+ } else if (kv.first->IsInstance<AllocateConstNode>()) {
+ AllocateConst allocate_const_node = Downcast<AllocateConst>(kv.first);
+ extent_size = CalculateExtentsSize(allocate_const_node.operator->());
+ } else {
+ LOG(FATAL) << "Not supported";
Review comment:
Lets use ICHECK if this is not expected to be hit by a user flow.
##########
File path: src/tir/usmp/analysis/extract_buffer_info.cc
##########
@@ -278,9 +286,78 @@ void BufferInfoExtractor::VisitStmt_(const AllocateNode*
op) {
current_scope_info.allocate_nodes.erase(GetRef<Allocate>(op));
}
+void BufferInfoExtractor::RecordAllocateConstNodeInfo(const AllocateConstNode*
op) {
+ if (!op->annotations.count(kPoolCandidatesAllocateAttr)) {
+ return;
+ }
+ Integer size_bytes = CalculateExtentsSize(op);
+ const auto& buffer_var = op->buffer_var;
+ // We only statically memory plan only allocates with known
+ // compile time sizes.
+ if (size_bytes.defined()) {
Review comment:
This if should not be needed for constants as they should definitely
have size defined. Moreover, the comment does not apply here.
##########
File path: src/tir/usmp/algo/greedy.cc
##########
@@ -131,8 +131,8 @@ Map<BufferInfo, PoolAllocation>
GreedyBase::PostSortAllocation(
}
}
auto selected_pool = SelectPlacementPool(buf_info, pool_offset_candidates);
- pool_allocations.Set(
- buf_info, PoolAllocation(selected_pool,
Integer(pool_offset_candidates[selected_pool])));
+ pool_allocations.Set(buf_info, PoolAllocation(selected_pool,
buf_info->alignment,
Review comment:
Refer to the other comment, please explain why is buffer alignment is
required
##########
File path: include/tvm/tir/usmp/utils.h
##########
@@ -150,20 +270,25 @@ class BufferInfoAnalysis : public ObjectRef {
struct PoolAllocationNode : public Object {
/*! \brief The assigned PoolInfo object */
PoolInfo pool_info;
+ /*! \brief The byte alignment where the tensor is supposed to be placed
within the pool*/
+ Integer byte_alignment;
Review comment:
Why do we need a seperate byte_alignment ?
byte_offset should be decided respecting that.
##########
File path: src/target/source/source_module.cc
##########
@@ -237,14 +241,65 @@ class CSourceCrtMetadataModuleNode : public
runtime::ModuleNode {
}
void GenerateInternalWorkspaceBuffers() {
+ Integer constants_byte_alignment_ =
+ target_->GetAttr<Integer>("workspace-byte-alignment").value_or(16);
+ size_t offset = 0;
if (metadata_->pool_inputs.defined()) {
for (const auto& kv : metadata_->pool_inputs.value()) {
tir::usmp::AllocatedPoolInfo allocated_pool_info = kv.second;
if (allocated_pool_info->pool_info->is_internal) {
- code_ << "__attribute__((section(\".data.tvm\"), ";
- code_ << "aligned(" << 16 << ")))\n";
- code_ << "static uint8_t " <<
allocated_pool_info->pool_info->pool_name << "["
- << allocated_pool_info->allocated_size->value << "];\n";
+ do {
+ for (const auto& kv :
allocated_pool_info->pool_info->target_access) {
+ if (kv.first->str() == target_->str()) {
+ if (kTargetPoolReadOnlyAccess == kv.second) {
Review comment:
Im thinking mainly because it would be confusing to have a scenario
where some targets to have a RO access while other having RW access. Therefore
it would require part of it be initialized. Moreover, if the same memory need
to be used for both, then seperation of the concepts of "Pools" and "Memory"
serves the purpose because one could place a Workspace and a Constant pool to
the same memory.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]