This is an automated email from the ASF dual-hosted git repository. moreau pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git
The following commit(s) were added to refs/heads/master by this push: new e11a609 [uTVM][Runtime] Introduce Virtual Memory Allocator to CRT (#5124) e11a609 is described below commit e11a6092e629cadd34af1f48be47817ca9c65fd4 Author: Liangfu Chen <liangfu.c...@icloud.com> AuthorDate: Wed Apr 8 05:33:05 2020 +0800 [uTVM][Runtime] Introduce Virtual Memory Allocator to CRT (#5124) * initial crt_memory and memory leak fix in graph_runtime Change-Id: I0f79f909a04d1c677aabb80f202f0612c5ce7f2a * fix memory leak Change-Id: I37104c09e28112b1974fa2b064c809d0a8d686c3 * clean up Change-Id: I039b12015a1d56c8f4120867cd5a5292da34f3e3 * implement vrealloc Change-Id: I35800470bcbfcf96652494f359711cb4c2d34398 * allocate from stack memory for most of the variables Change-Id: I72071289843fff4031c0df8796868a0b9fbc57ee * allocate from stack memory for all of the variables Change-Id: I32dba85ac1660c77f51c2d0d8ab6436ed0c01c74 * lint Change-Id: If12cd240685d7791fc60bc0cfb66389cdc186b73 * lint Change-Id: I7c9d90c11b60b8edda2427ebd189ebe535af2100 * facilitate the growth of TVM_CRT_MAX_NDIM Change-Id: I939fa43027a5c7529c5c7c6bd8d6e6beb91b7581 * extend test coverage of vmalloc Change-Id: Ie4ff6b64fdfe6810836cf8fd44dace82a20c4581 * lint Change-Id: Ibf3c06619ef296df5c49f3945cb6428777781d69 * move logging.h to src * fix an error in macOS * remove logging.h * use cflags for gcc * fix compilation error --- apps/bundle_deploy/Makefile | 16 +- apps/bundle_deploy/demo.cc | 10 +- apps/bundle_deploy/runtime.c | 40 ++- apps/bundle_deploy/test.cc | 10 +- .../module.h => include/tvm/runtime/crt/memory.h | 46 +-- src/runtime/crt/crt_backend_api.c | 12 +- src/runtime/crt/graph_runtime.c | 202 +++++++---- src/runtime/crt/graph_runtime.h | 43 +-- src/runtime/crt/load_json.c | 24 +- src/runtime/crt/logging.h | 73 ++++ src/runtime/crt/memory.c | 393 +++++++++++++++++++++ src/runtime/crt/module.h | 5 +- src/runtime/crt/ndarray.c | 10 +- src/runtime/crt/packed_func.h | 10 +- .../crt/module.h => tests/cpp/crt_memory_test.cc | 55 +-- 15 files changed, 757 insertions(+), 192 deletions(-) diff --git a/apps/bundle_deploy/Makefile b/apps/bundle_deploy/Makefile index c80765f..73f9d75 100644 --- a/apps/bundle_deploy/Makefile +++ b/apps/bundle_deploy/Makefile @@ -20,11 +20,11 @@ # Setup build environment TVM_ROOT=$(shell cd ../..; pwd) DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core -PKG_CXXFLAGS = -std=c++14 -O2 -fPIC \ +PKG_CXXFLAGS = -Wall -std=c++14 -O2 -fPIC \ -I${TVM_ROOT}/include \ -I${DMLC_CORE}/include \ -I${TVM_ROOT}/3rdparty/dlpack/include -PKG_CFLAGS = -std=c99 -O2 -fPIC \ +PKG_CFLAGS = -Wall -std=c99 -O2 -fPIC \ -I${TVM_ROOT}/include \ -I${DMLC_CORE}/include \ -I${TVM_ROOT}/3rdparty/dlpack/include @@ -57,11 +57,11 @@ $(build_dir)/test_dynamic: test.cc ${build_dir}/test_graph.json ${build_dir}/tes $(build_dir)/demo_static: demo_static.c ${build_dir}/bundle_static.o ${build_dir}/model.o ${build_dir}/graph.json.c ${build_dir}/params.bin.c @mkdir -p $(@D) - gcc $(PKG_CXXFLAGS) -o $@ demo_static.c ${build_dir}/bundle_static.o ${build_dir}/model.o -lm + gcc $(PKG_CFLAGS) -o $@ demo_static.c ${build_dir}/bundle_static.o ${build_dir}/model.o -lm $(build_dir)/test_static: test_static.c ${build_dir}/bundle_static.o ${build_dir}/test_model.o @mkdir -p $(@D) - gcc $(PKG_CXXFLAGS) -o $@ $^ + gcc $(PKG_CFLAGS) -o $@ $^ # Serialize our graph.json file. $(build_dir)/graph.json.c: $(build_dir)/graph.json @@ -71,14 +71,6 @@ $(build_dir)/graph.json.c: $(build_dir)/graph.json $(build_dir)/params.bin.c: $(build_dir)/params.bin xxd -i $^ > $@ -# # Serialize our test_graph.json file. -# $(build_dir)/test_graph.json.c: $(build_dir)/test_graph.json -# xxd -i $^ > $@ -# -# # Serialize our test_params.bin file. -# $(build_dir)/test_params.bin.c: $(build_dir)/test_params.bin -# xxd -i $^ > $@ - $(build_dir)/model.o $(build_dir)/graph.json $(build_dir)/params.bin $(build_dir)/cat.bin: build_model.py python3 $< -o $(build_dir) diff --git a/apps/bundle_deploy/demo.cc b/apps/bundle_deploy/demo.cc index 34be279..0de10d7 100644 --- a/apps/bundle_deploy/demo.cc +++ b/apps/bundle_deploy/demo.cc @@ -109,11 +109,11 @@ int main(int argc, char **argv) { max_index, max_iter); printf("timing: %.2f ms (create), %.2f ms (set_input), %.2f ms (run), " "%.2f ms (get_output), %.2f ms (destroy)\n", - (t1.tv_sec-t0.tv_sec)*1000000 + (t1.tv_usec-t0.tv_usec)/1000.f, - (t2.tv_sec-t1.tv_sec)*1000000 + (t2.tv_usec-t1.tv_usec)/1000.f, - (t3.tv_sec-t2.tv_sec)*1000000 + (t3.tv_usec-t2.tv_usec)/1000.f, - (t4.tv_sec-t3.tv_sec)*1000000 + (t4.tv_usec-t3.tv_usec)/1000.f, - (t5.tv_sec-t4.tv_sec)*1000000 + (t5.tv_usec-t4.tv_usec)/1000.f); + (t1.tv_sec-t0.tv_sec)*1000.0f + (t1.tv_usec-t0.tv_usec)/1000.f, + (t2.tv_sec-t1.tv_sec)*1000.0f + (t2.tv_usec-t1.tv_usec)/1000.f, + (t3.tv_sec-t2.tv_sec)*1000.0f + (t3.tv_usec-t2.tv_usec)/1000.f, + (t4.tv_sec-t3.tv_sec)*1000.0f + (t4.tv_usec-t3.tv_usec)/1000.f, + (t5.tv_sec-t4.tv_sec)*1000.0f + (t5.tv_usec-t4.tv_usec)/1000.f); dlclose(bundle); return 0; diff --git a/apps/bundle_deploy/runtime.c b/apps/bundle_deploy/runtime.c index 6a53aa1..a7ffea9 100644 --- a/apps/bundle_deploy/runtime.c +++ b/apps/bundle_deploy/runtime.c @@ -30,23 +30,37 @@ #define TVM_CRT_MAX_NDIM 6 /*! Maximum supported arguments in generated functions */ #define TVM_CRT_MAX_ARGS 10 +/*! Maximum supported string length in dltype, e.g. "int8", "int16", "float32" */ +#define TVM_CRT_STRLEN_DLTYPE 10 +/*! Maximum supported string length in function names */ +#define TVM_CRT_STRLEN_NAME 80 -/*! Maximum inputs in a GraphRuntimeNode */ -#define GRAPH_RUNTIME_NODE_MAX_INPUTS 300 -/*! Maximum supported contexts in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_CONTEXTS 1 -/*! Maximum supported nodes in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_NODES 400 -/*! Maximum input nodes in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_INPUT_NODES 300 -/*! Maximum nodes in a GraphRuntime for quick entry indexing */ -#define GRAPH_RUNTIME_MAX_NODE_ROW_PTR 300 -/*! Maximum output entries in a GraphRuntime */ -#define GRAPH_RUNTIME_MAX_OUTPUTS 300 +/*! + * \brief Log memory pool size for virtual memory allocation + * + * Here is a list of possible choices: + * * use 16 for 64 KiB memory space + * * use 17 for 128 KiB memory space + * * use 18 for 256 KiB memory space + * * use 19 for 512 KiB memory space + * * use 20 for 1 MiB memory space + * * use 21 for 2 MiB memory space + * * use 22 for 4 MiB memory space + * * use 23 for 8 MiB memory space + * * use 24 for 16 MiB memory space + * * use 25 for 32 MiB memory space + * * use 26 for 64 MiB memory space + * * use 27 for 128 MiB memory space + * * use 28 for 256 MiB memory space + */ +#define TVM_CRT_LOG_VIRT_MEM_SIZE 24 + +/*! \brief Page size for virtual memory allocation */ +#define TVM_CRT_PAGE_BYTES 4096 #include "../../src/runtime/crt/crt_runtime_api.c" #include "../../src/runtime/crt/crt_backend_api.c" #include "../../src/runtime/crt/graph_runtime.c" #include "../../src/runtime/crt/load_json.c" #include "../../src/runtime/crt/ndarray.c" - +#include "../../src/runtime/crt/memory.c" diff --git a/apps/bundle_deploy/test.cc b/apps/bundle_deploy/test.cc index 643f1ad..c92400d 100644 --- a/apps/bundle_deploy/test.cc +++ b/apps/bundle_deploy/test.cc @@ -122,11 +122,11 @@ int main(int argc, char **argv) { printf("timing: %.2f ms (create), %.2f ms (set_input), %.2f ms (run), " "%.2f ms (get_output), %.2f ms (destroy)\n", - (t1.tv_sec-t0.tv_sec)*1000000 + (t1.tv_usec-t0.tv_usec)/1000.f, - (t2.tv_sec-t1.tv_sec)*1000000 + (t2.tv_usec-t1.tv_usec)/1000.f, - (t3.tv_sec-t2.tv_sec)*1000000 + (t3.tv_usec-t2.tv_usec)/1000.f, - (t4.tv_sec-t3.tv_sec)*1000000 + (t4.tv_usec-t3.tv_usec)/1000.f, - (t5.tv_sec-t4.tv_sec)*1000000 + (t5.tv_usec-t4.tv_usec)/1000.f); + (t1.tv_sec-t0.tv_sec)*1000.0f + (t1.tv_usec-t0.tv_usec)/1000.f, + (t2.tv_sec-t1.tv_sec)*1000.0f + (t2.tv_usec-t1.tv_usec)/1000.f, + (t3.tv_sec-t2.tv_sec)*1000.0f + (t3.tv_usec-t2.tv_usec)/1000.f, + (t4.tv_sec-t3.tv_sec)*1000.0f + (t4.tv_usec-t3.tv_usec)/1000.f, + (t5.tv_sec-t4.tv_sec)*1000.0f + (t5.tv_usec-t4.tv_usec)/1000.f); free(json_data); free(params_data); diff --git a/src/runtime/crt/module.h b/include/tvm/runtime/crt/memory.h similarity index 53% copy from src/runtime/crt/module.h copy to include/tvm/runtime/crt/memory.h index 8ff979b..3e47060 100644 --- a/src/runtime/crt/module.h +++ b/include/tvm/runtime/crt/memory.h @@ -18,31 +18,35 @@ */ /*! - * \file src/runtime/crt/module.h - * \brief Runtime container of the functions + * \file tvm/runtime/crt/memory.h + * \brief The virtual memory manager for micro-controllers */ -#ifndef TVM_RUNTIME_CRT_MODULE_H_ -#define TVM_RUNTIME_CRT_MODULE_H_ -#include <string.h> -#include <tvm/runtime/c_runtime_api.h> +#ifndef TVM_RUNTIME_CRT_MEMORY_H_ +#define TVM_RUNTIME_CRT_MEMORY_H_ -struct TVMPackedFunc; -typedef struct TVMPackedFunc TVMPackedFunc; +static int vleak_size = 0; /*! - * \brief Module container of TVM. + * \brief Allocate memory from manager + * \param size The size of memory + * \return The virtual address */ -typedef struct TVMModule { - /*! - * \brief Get packed function from current module by name. - * - * \param name The name of the function. - * \param pf The result function. - * - * This function will return PackedFunc(nullptr) if function do not exist. - */ - void (*GetFunction)(const char * name, TVMPackedFunc * pf); -} TVMModule; +void * vmalloc(size_t size); -#endif // TVM_RUNTIME_CRT_MODULE_H_ +/*! + * \brief Reallocate memory from manager + * \param ptr The pointer to the memory area to be reallocated + * \param size The size of memory + * \return The virtual address + */ +void * vrealloc(void * ptr, size_t size); + +/*! + * \brief Free the memory. + * \param ptr The pointer to the memory to deallocate + * \return The virtual address + */ +void vfree(void * ptr); + +#endif // TVM_RUNTIME_CRT_MEMORY_H_ diff --git a/src/runtime/crt/crt_backend_api.c b/src/runtime/crt/crt_backend_api.c index e011e47..52cefaf 100644 --- a/src/runtime/crt/crt_backend_api.c +++ b/src/runtime/crt/crt_backend_api.c @@ -18,6 +18,7 @@ */ #include <tvm/runtime/c_backend_api.h> +#include <tvm/runtime/crt/memory.h> #include <stdio.h> #include <stdlib.h> @@ -29,18 +30,12 @@ void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t nbytes, void* ptr = 0; assert(nbytes > 0); unsigned int dtype_bytes = dtype_bits_hint / 8; -#ifdef __ANDROID__ - ptr = memalign(64, nbytes * dtype_bytes); -#else - const int ret = posix_memalign(&ptr, 64, nbytes * dtype_bytes); - (void)ret; - assert(ret == 0); -#endif + ptr = vmalloc(nbytes * dtype_bytes); return ptr; } int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) { - free(ptr); + vfree(ptr); return 0; } @@ -52,6 +47,7 @@ int TVMBackendParallelLaunch(FTVMParallelLambda flambda, void* cdata, int num_ta } int TVMBackendRegisterSystemLibSymbol(const char* name, void* ptr) { + g_fexecs = vrealloc(g_fexecs, sizeof(TVMPackedFunc) * (g_fexecs_count + 1)); snprintf(g_fexecs[g_fexecs_count].name, sizeof(g_fexecs[g_fexecs_count].name), name); g_fexecs[g_fexecs_count].fexec = ptr; g_fexecs_count++; diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c index 89c325a..b5ed3b7 100644 --- a/src/runtime/crt/graph_runtime.c +++ b/src/runtime/crt/graph_runtime.c @@ -21,6 +21,10 @@ * \file graph_runtime.c * \brief implement graph runtime in pure C */ + +#include <tvm/runtime/crt/memory.h> + +#include "logging.h" #include "graph_runtime.h" #ifndef MAX @@ -105,13 +109,9 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode * node, JSONReader *reader) { bitmask |= 2; } else if (!strcmp(key, "inputs")) { size_t count = node->inputs_count; - if (count >= GRAPH_RUNTIME_NODE_MAX_INPUTS) { - fprintf(stderr, "The number of inputs in graph runtime node is greater than expected.\n"); - status = -1; - break; - } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + node->inputs = vrealloc(node->inputs, sizeof(TVMGraphRuntimeNodeEntry)*(count+1)); TVMGraphRuntimeNodeEntry * inputs = node->inputs + count; reader->BeginArray(reader); if (!reader->NextArrayItem(reader)) { @@ -169,6 +169,14 @@ TVMGraphRuntimeNode TVMGraphRuntimeNodeCreate() { return node; } +void TVMGraphRuntimeNodeRelease(TVMGraphRuntimeNode * node) { + if (!node) { return; } + if (node->inputs) { + vfree(node->inputs); + node->inputs = 0; + } +} + int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *reader) { int status = 0; int bitmask = 0; @@ -199,7 +207,8 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { - reader->ReadString(reader, attr->dltype[dltype_count]); + attr->dltype = vrealloc(attr->dltype, TVM_CRT_STRLEN_DLTYPE * (dltype_count + 1)); + reader->ReadString(reader, attr->dltype + dltype_count * TVM_CRT_STRLEN_DLTYPE); dltype_count++; } attr->dltype_count = dltype_count;; @@ -229,6 +238,7 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + attr->storage_id = vrealloc(attr->storage_id, sizeof(uint32_t)*(storage_id_count+1)); reader->ReadUnsignedInteger(reader, &(attr->storage_id[storage_id_count])); storage_id_count++; } @@ -258,26 +268,24 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r } reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + attr->shape = + vrealloc(attr->shape, sizeof(attr->shape[0])*(shape_count+1)*TVM_CRT_MAX_NDIM); + attr->ndim = vrealloc(attr->ndim, sizeof(attr->ndim[0])*(shape_count+1)); reader->BeginArray(reader); - reader->ReadInteger(reader, &(attr->shape[shape_count][0])); + int64_t * attr_shape_ptr = attr->shape + shape_count*TVM_CRT_MAX_NDIM; + reader->ReadInteger(reader, attr_shape_ptr + 0); uint32_t ndim = 1; if (reader->NextArrayItem(reader)) { - if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][1])); ndim++; + for (ndim = 1; ndim < TVM_CRT_MAX_NDIM; ndim++) { if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][2])); ndim++; - if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][3])); ndim++; - if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][4])); ndim++; - if (reader->NextArrayItem(reader)) { - reader->ReadInteger(reader, &(attr->shape[shape_count][5])); ndim++; - reader->NextArrayItem(reader); - } - } - } + reader->ReadInteger(reader, attr_shape_ptr + ndim); + } else { + break; } } + if (ndim == TVM_CRT_MAX_NDIM) { + reader->NextArrayItem(reader); + } } attr->ndim[shape_count] = ndim; shape_count++; @@ -308,6 +316,7 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r break; } while (reader->NextArrayItem(reader)) { + attr->device_index = vrealloc(attr->device_index, sizeof(uint32_t)*(device_index_count+1)); reader->ReadUnsignedInteger(reader, &(attr->device_index[device_index_count])); device_index_count++; } @@ -330,13 +339,18 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r status = -1; break; } - uint32_t temp[GRAPH_RUNTIME_MAX_NODES]; + uint32_t * temp = 0; uint32_t temp_count = 0; reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + temp = vrealloc(temp, sizeof(uint32_t) * (temp_count + 1)); reader->ReadUnsignedInteger(reader, &(temp[temp_count])); temp_count++; } + if (temp) { + vfree(temp); + temp = 0; + } } else if (!strcmp(type, "size_t")) { if (!(reader->NextArrayItem(reader))) { fprintf(stderr, "Invalid json format\n"); @@ -364,6 +378,30 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r return status; } +void TVMGraphRuntimeGraphAttr_Release(TVMGraphRuntimeGraphAttr * attr) { + if (!attr) { return; } + if (attr->storage_id) { + vfree(attr->storage_id); + attr->storage_id = 0; + } + if (attr->device_index) { + vfree(attr->device_index); + attr->device_index = 0; + } + if (attr->dltype) { + vfree(attr->dltype); + attr->dltype = 0; + } + if (attr->shape) { + vfree(attr->shape); + attr->shape = 0; + } + if (attr->ndim) { + vfree(attr->ndim); + attr->ndim = 0; + } +} + int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { int status = 0; reader->BeginObject(reader); @@ -373,6 +411,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { if (!strcmp(key, "nodes")) { reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + runtime->nodes = + vrealloc(runtime->nodes, sizeof(TVMGraphRuntimeNode) * (runtime->nodes_count + 1)); TVMGraphRuntimeNode * node = runtime->nodes + runtime->nodes_count; status = TVMGraphRuntimeNode_Load(node, reader); if (status != 0) { @@ -380,7 +420,7 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { break; #if TVM_CRT_DEBUG } else { - printf("layer %u: `%s` loaded.\n", runtime->nodes_count, node->name); + printf("loading: node (%u) %s loaded.\n", runtime->nodes_count, node->name); #endif // TVM_CRT_DEBUG } runtime->nodes_count++; @@ -389,6 +429,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { } else if (!strcmp(key, "arg_nodes")) { reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + runtime->input_nodes = + vrealloc(runtime->input_nodes, sizeof(uint32_t) * (runtime->input_nodes_count + 1)); uint32_t * node = runtime->input_nodes + runtime->input_nodes_count; reader->ReadUnsignedInteger(reader, node); runtime->input_nodes_count++; @@ -397,6 +439,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { } else if (!strcmp(key, "node_row_ptr")) { reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + runtime->node_row_ptr = + vrealloc(runtime->node_row_ptr, sizeof(uint32_t) * (runtime->node_row_ptr_count + 1)); uint32_t count = runtime->node_row_ptr_count; uint32_t * node = runtime->node_row_ptr + count; reader->ReadUnsignedInteger(reader, node); @@ -406,6 +450,9 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) { } else if (!strcmp(key, "heads")) { reader->BeginArray(reader); while (reader->NextArrayItem(reader)) { + runtime->outputs = + vrealloc(runtime->outputs, + sizeof(TVMGraphRuntimeNodeEntry) * (runtime->outputs_count + 1)); TVMGraphRuntimeNodeEntry * entry = runtime->outputs + runtime->outputs_count; status = NodeEntry_Load(entry, reader); if (status != 0) { @@ -458,9 +505,7 @@ int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime * runtime, const char * name) break; } } - if (rv < 0) { - fprintf(stderr, "cannot find \"%s\" among input\n", name); - } + CHECK_GE(rv, 0, "cannot find '%s' among input.", name); return rv; } @@ -476,7 +521,7 @@ void TVMGraphRuntime_SetInput(TVMGraphRuntime * runtime, const char * name, DLTe fprintf(stderr, "given index is greater than num of input nodes.\n"); } uint32_t eid = runtime->GetEntryId(runtime, runtime->input_nodes[index], 0); - runtime->data_entry[eid].dl_tensor = *data_in; + runtime->data_entry[eid].dl_tensor.data = data_in->data; } /*! @@ -501,8 +546,8 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo bptr += sizeof(reserved); // read names - char names[GRAPH_RUNTIME_MAX_NODES][80]; - memset(names, 0, sizeof(names)); + char * names = vmalloc(TVM_CRT_STRLEN_NAME * runtime->nodes_count); + memset(names, 0, TVM_CRT_STRLEN_NAME * runtime->nodes_count); uint64_t names_count; int idx; names_count = ((uint64_t*)bptr)[0]; // NOLINT(*) @@ -515,7 +560,7 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo fprintf(stderr, "Error: function name longer than expected.\n"); status = -1; } - memcpy(names[idx], bptr, name_length); + memcpy(names + TVM_CRT_STRLEN_NAME * idx, bptr, name_length); bptr += name_length; } @@ -530,11 +575,9 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo } for (idx = 0; idx < size; idx++) { - int32_t in_idx = runtime->GetInputIndex(runtime, names[idx]); - if (!(in_idx >= 0)) { - fprintf(stderr, "Found param for non-existent input: %s\n", names[idx]); - status = -1; - } + int32_t in_idx = runtime->GetInputIndex(runtime, names + TVM_CRT_STRLEN_NAME * idx); + CHECK_GT(in_idx, 0, + "Found param for non-existent input: %s\n", names + TVM_CRT_STRLEN_NAME * idx); uint32_t eid = runtime->GetEntryId(runtime, runtime->input_nodes[in_idx], 0); if (!(eid < runtime->data_entry_count)) { fprintf(stderr, "`entry_id`=%d is greater than expected(%d).\n", @@ -542,15 +585,26 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo status = -1; } + if (runtime->data_entry[eid].dl_tensor.shape) { + vfree(runtime->data_entry[eid].dl_tensor.shape); + runtime->data_entry[eid].dl_tensor.shape = 0; + } + if (runtime->data_entry[eid].dl_tensor.data) { + vfree(runtime->data_entry[eid].dl_tensor.data); + runtime->data_entry[eid].dl_tensor.data = 0; + } status |= TVMNDArray_Load(&(runtime->data_entry[eid]), &bptr); #if TVM_CRT_DEBUG TVMNDArray * entry = &(runtime->data_entry[eid]); - printf("param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n", - names[idx], in_idx, eid, entry->dl_tensor.ndim, + printf("loading: param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n", + names + TVM_CRT_STRLEN_NAME * idx, in_idx, eid, entry->dl_tensor.ndim, ((float*)entry->dl_tensor.data)[0]); // NOLINT(*) #endif // TVM_CRT_DEBUG } + // Release memory + vfree(names); + return status; } @@ -564,7 +618,7 @@ void TVMGraphRuntime_Run(TVMGraphRuntime * runtime) { for (idx = 0; idx < runtime->op_execs_count; ++idx) { if (runtime->op_execs[idx].fexec) { #if TVM_CRT_DEBUG - printf("calling %s (%d)\n", runtime->op_execs[idx].name, idx); + printf("calling: %s (%d)\n", runtime->op_execs[idx].name, idx); #endif // TVM_CRT_DEBUG runtime->op_execs[idx].Call(&(runtime->op_execs[idx])); } @@ -581,33 +635,34 @@ int TVMGraphRuntime_GetOutput(TVMGraphRuntime * runtime, const int32_t idx, DLTe int32_t elem_bytes = out->dtype.bits / 8; int64_t size = Shape_Accumulate(out->shape, out->ndim); DLTensor * tensor = &(runtime->data_entry[eid].dl_tensor); - assert(out->ndim == tensor->ndim); - assert(out->dtype.bits == tensor->dtype.bits); - assert(Shape_Accumulate(out->shape, out->ndim) == Shape_Accumulate(tensor->shape, tensor->ndim)); + CHECK(out->ndim == tensor->ndim); + CHECK(out->dtype.bits == tensor->dtype.bits); + CHECK(Shape_Accumulate(out->shape, out->ndim) == Shape_Accumulate(tensor->shape, tensor->ndim)); memcpy(out->data, tensor->data, size * elem_bytes); return status; } void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { - uint32_t idx, dim; + uint32_t idx; // Grab saved optimization plan from graph. - DLDataType vtype[GRAPH_RUNTIME_MAX_NODES]; TVMGraphRuntimeGraphAttr * attrs = &(runtime->attrs); + DLDataType * vtype = vmalloc(sizeof(DLDataType) * attrs->dltype_count); for (idx = 0; idx < attrs->dltype_count; idx++) { - vtype[idx] = String2DLDataType(attrs->dltype[idx]); + vtype[idx] = String2DLDataType(attrs->dltype + idx * TVM_CRT_STRLEN_DLTYPE); } // Size and device type of each storage pool entry. - TVMGraphRuntimePoolEntry pool_entry[GRAPH_RUNTIME_MAX_NODES]; - memset(pool_entry, 0, sizeof(pool_entry)); + TVMGraphRuntimePoolEntry * pool_entry = + vmalloc(sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count); + memset(pool_entry, 0, sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count); uint32_t pool_entry_count = 0; // Find the maximum space size. for (idx = 0; idx < attrs->shape_count; idx++) { int storage_id = attrs->storage_id[idx]; // Use the fallback device if no device index is available. int device_type = runtime->ctxs[0].device_type; - uint32_t size = Shape_Accumulate(attrs->shape[idx], attrs->ndim[idx]); + uint32_t size = Shape_Accumulate(attrs->shape+idx*TVM_CRT_MAX_NDIM, attrs->ndim[idx]); DLDataType t = vtype[idx]; uint32_t bits = t.bits * t.lanes; size_t bytes = ((bits + 7U) / 8U) * size; @@ -622,15 +677,16 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { // Allocate the space. for (idx = 0; idx < pool_entry_count; idx++) { + runtime->storage_pool = + vrealloc(runtime->storage_pool, sizeof(TVMNDArray) * (runtime->storage_pool_count + 1)); TVMGraphRuntimePoolEntry pit = pool_entry[idx]; int64_t shape[TVM_CRT_MAX_NDIM] = {0, }; TVMContext ctx = runtime->ctxs[0]; DLDataType dtype = {kDLFloat, 32, 1}; shape[0] = (pit.size + 3) / 4; runtime->storage_pool[runtime->storage_pool_count] = TVMNDArray_Empty(1, shape, dtype, ctx); - if (runtime->storage_pool[runtime->storage_pool_count].dl_tensor.data == 0) { - fprintf(stderr, "fail to create storage_pool with idx=%d\n", idx); - } + CHECK_NE(runtime->storage_pool[runtime->storage_pool_count].dl_tensor.data, 0, + "fail to create storage_pool with idx=%d\n", idx); runtime->storage_pool_count++; } @@ -638,26 +694,31 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) { // memory assignment for each node entry. The allocated memory on each device // is mapped to this pool. runtime->data_entry_count = runtime->node_row_ptr[runtime->node_row_ptr_count - 1]; + runtime->data_entry = vmalloc(sizeof(TVMNDArray) * runtime->data_entry_count); for (idx = 0; idx < runtime->data_entry_count; ++idx) { size_t storage_id = attrs->storage_id[idx]; - assert(storage_id < runtime->storage_pool_count); + CHECK(storage_id < runtime->storage_pool_count); runtime->data_entry[idx] = TVMNDArray_CreateView(&(runtime->storage_pool[storage_id]), - attrs->shape[idx], attrs->ndim[idx], vtype[idx]); - if (runtime->data_entry[idx].dl_tensor.data == 0) { - fprintf(stderr, "fail to create for node with idx=%d, storage_id=%d\n", idx, storage_id); - } + attrs->shape+idx*TVM_CRT_MAX_NDIM, attrs->ndim[idx], vtype[idx]); + CHECK_NE(runtime->data_entry[idx].dl_tensor.data, 0, + "fail to create for node with idx=%d, storage_id=%lu\n", idx, storage_id); } + + // Release memory + vfree(vtype); + vfree(pool_entry); } int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) { int status = 0; uint32_t nid, idx; runtime->op_execs_count = runtime->nodes_count; + runtime->op_execs = vmalloc(sizeof(TVMPackedFunc) * runtime->op_execs_count); for (nid = 0; nid < runtime->nodes_count; nid++) { const TVMGraphRuntimeNode * inode = runtime->nodes + nid; if (strcmp(inode->op_type, "null")) { - DLTensorPtr args[GRAPH_RUNTIME_MAX_NODES]; + DLTensorPtr args[TVM_CRT_MAX_ARGS]; uint32_t args_count = 0; for (idx = 0; idx < inode->inputs_count; idx++) { const TVMGraphRuntimeNodeEntry * entry = inode->inputs + idx; @@ -671,7 +732,7 @@ int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) { args_count++; } if (strcmp(inode->op_type, "tvm_op")) { - fprintf(stderr, "Can only take tvm_op as op\n"); + fprintf(stderr, "Can only take tvm_op as op, but \"%s\" is found.\n", inode->op_type); status = -1; break; } @@ -682,7 +743,7 @@ int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) { break; } #if TVM_CRT_DEBUG - printf("creating tvm_op: %s with node_id=%d\n", inode->param.func_name, nid); + printf("tvm_op: creating %s with node_id=%d\n", inode->param.func_name, nid); #endif // TVM_CRT_DEBUG TVMPackedFunc pf; runtime->CreateTVMOp(runtime, &(inode->param), args, args_count, inode->inputs_count, &pf); @@ -735,7 +796,7 @@ int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime * runtime, const TVMOpParam status = -1; } - runtime->module.GetFunction(param->func_name, pf); + runtime->module.GetFunction(&(runtime->module), param->func_name, pf); TVMArgs targs = TVMArgs_Create(arg_ptr.arg_values, arg_ptr.arg_tcodes, arg_ptr.arg_values_count); pf->SetArgs(pf, &targs); @@ -762,7 +823,7 @@ void TVMGraphRuntime_Init(TVMGraphRuntime * runtime, const char * graph_json, TVMGraphRuntime * TVMGraphRuntimeCreate(const char * sym_json, const TVMModule * m, const TVMContext * ctxs) { - TVMGraphRuntime * runtime = (TVMGraphRuntime*)malloc(sizeof(TVMGraphRuntime)); // NOLINT(*) + TVMGraphRuntime * runtime = (TVMGraphRuntime*)vmalloc(sizeof(TVMGraphRuntime)); // NOLINT(*) memset(runtime, 0, sizeof(TVMGraphRuntime)); runtime->GetEntryId = TVMGraphRuntime_GetEntryId; runtime->GetInputIndex = TVMGraphRuntime_GetInputIndex; @@ -784,8 +845,29 @@ TVMGraphRuntime * TVMGraphRuntimeCreate(const char * sym_json, void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) { int32_t idx; TVMGraphRuntime * runtime = *pptr; + for (idx = 0; idx < runtime->nodes_count; ++idx) { + TVMGraphRuntimeNodeRelease(&(runtime->nodes[idx])); + } + vfree(runtime->nodes); + TVMGraphRuntimeGraphAttr_Release(&(runtime->attrs)); for (idx = 0; idx < runtime->storage_pool_count; ++idx) { TVMNDArray_Release(&(runtime->storage_pool[idx])); } - free(*pptr); + for (idx = 0; idx < runtime->data_entry_count; ++idx) { + vfree(runtime->data_entry[idx].dl_tensor.shape); + } + vfree(runtime->input_nodes); + vfree(runtime->node_row_ptr); + vfree(runtime->outputs); + vfree(runtime->storage_pool); + vfree(runtime->data_entry); + vfree(runtime->op_execs); + vfree(*pptr); + + if (g_fexecs) { + vfree(g_fexecs); + g_fexecs = 0; + } + + CHECK_EQ(vleak_size, 0, "found memory leak, leak size=%d", vleak_size); } diff --git a/src/runtime/crt/graph_runtime.h b/src/runtime/crt/graph_runtime.h index 5b6e905..3cb8ba9 100644 --- a/src/runtime/crt/graph_runtime.h +++ b/src/runtime/crt/graph_runtime.h @@ -63,10 +63,11 @@ typedef struct TVMGraphRuntimeNode { // parameters TVMOpParam param; // inputs - TVMGraphRuntimeNodeEntry inputs[GRAPH_RUNTIME_NODE_MAX_INPUTS]; - size_t inputs_count; + TVMGraphRuntimeNodeEntry * inputs; + // number of inputs + size_t inputs_count; // control deps - uint32_t control_deps[200]; + uint32_t control_deps[20]; // JSON Loader void (*LoadAttrs)(struct TVMGraphRuntimeNode * node, JSONReader *reader, TVMOpParam* param); // JSON Loader @@ -76,12 +77,12 @@ typedef struct TVMGraphRuntimeNode { // Graph attribute typedef struct TVMGraphRuntimeGraphAttr { uint32_t storage_num_not_alloctaed; - uint32_t storage_id[GRAPH_RUNTIME_MAX_NODES]; - uint32_t device_index[GRAPH_RUNTIME_MAX_NODES]; - char dltype[GRAPH_RUNTIME_MAX_NODES][10]; // "int8", "int16", "float32" + uint32_t * storage_id; + uint32_t * device_index; + char * dltype; // "int8", "int16", "float32" uint32_t dltype_count; - int64_t shape[GRAPH_RUNTIME_MAX_NODES][TVM_CRT_MAX_NDIM]; - uint32_t ndim[GRAPH_RUNTIME_MAX_NODES]; + int64_t * shape; + uint32_t * ndim; uint32_t shape_count; } TVMGraphRuntimeGraphAttr; @@ -169,33 +170,35 @@ typedef struct TVMGraphRuntime { // Get node entry index. uint32_t (*GetEntryId)(struct TVMGraphRuntime * runtime, uint32_t nid, uint32_t index); - // /*! \brief The graph nodes. */ - TVMGraphRuntimeNode nodes[GRAPH_RUNTIME_MAX_NODES]; - uint32_t nodes_count; + /*! \brief The graph nodes. */ + TVMGraphRuntimeNode * nodes; + /*! \brief The graph nodes counter. */ + uint32_t nodes_count; /*! \brief The argument nodes. */ - uint32_t input_nodes[GRAPH_RUNTIME_MAX_INPUT_NODES]; - uint32_t input_nodes_count; + uint32_t * input_nodes; + uint32_t input_nodes_count; /*! \brief Used for quick entry indexing. */ - uint32_t node_row_ptr[GRAPH_RUNTIME_MAX_NODE_ROW_PTR]; + uint32_t * node_row_ptr; uint32_t node_row_ptr_count; /*! \brief Output entries. */ - TVMGraphRuntimeNodeEntry outputs[GRAPH_RUNTIME_MAX_OUTPUTS]; - uint32_t outputs_count; + TVMGraphRuntimeNodeEntry * outputs; + /*! \brief Output entries counter. */ + uint32_t outputs_count; /*! \brief Additional graph attributes. */ TVMGraphRuntimeGraphAttr attrs; /*! \brief The code module that contains both host and device code. */ TVMModule module; /*! \brief Execution context of all devices including the host. */ - TVMContext ctxs[GRAPH_RUNTIME_MAX_CONTEXTS]; + TVMContext ctxs[1]; uint32_t ctxs_count; /*! \brief Common storage pool for all devices. */ - TVMNDArray storage_pool[GRAPH_RUNTIME_MAX_NODES]; + TVMNDArray * storage_pool; uint32_t storage_pool_count; /*! \brief Data entry of each node. */ - TVMNDArray data_entry[GRAPH_RUNTIME_MAX_NODES]; + TVMNDArray * data_entry; uint32_t data_entry_count; /*! \brief Operator on each node. */ - TVMPackedFunc op_execs[GRAPH_RUNTIME_MAX_NODES]; + TVMPackedFunc * op_execs; uint32_t op_execs_count; } TVMGraphRuntime; diff --git a/src/runtime/crt/load_json.c b/src/runtime/crt/load_json.c index 894ab89..cf9492b 100644 --- a/src/runtime/crt/load_json.c +++ b/src/runtime/crt/load_json.c @@ -21,6 +21,8 @@ * \file load_json.c * \brief Load graph from JSON file. */ +#include <tvm/runtime/crt/memory.h> + #include "load_json.h" // the node entry structure in serialized format @@ -74,10 +76,10 @@ void SeqPop(Seq * seq) { } Seq * SeqCreate(uint64_t len) { - Seq * seq = (Seq*)malloc(sizeof(Seq)); // NOLINT(*) + Seq * seq = (Seq*)vmalloc(sizeof(Seq)); // NOLINT(*) memset(seq, 0, sizeof(Seq)); seq->allocated = len; - seq->data = (uint32_t*)malloc(sizeof(uint32_t)*len); // NOLINT(*) + seq->data = (uint32_t*)vmalloc(sizeof(uint32_t)*len); // NOLINT(*) seq->push_back = SeqPush; seq->back = SeqBack; seq->pop_back = SeqPop; @@ -85,8 +87,8 @@ Seq * SeqCreate(uint64_t len) { } void SeqRelease(Seq ** seq) { - free((*seq)->data); - free(*seq); + vfree((*seq)->data); + vfree(*seq); } @@ -156,11 +158,11 @@ int JSONReader_ReadString(JSONReader * reader, char * out_str) { if (ch == '\\') { char sch = reader->NextChar(reader); switch (sch) { - case 'r': snprintf(output, sizeof(output), "%s\r", output); break; - case 'n': snprintf(output, sizeof(output), "%s\n", output); break; - case '\\': snprintf(output, sizeof(output), "%s\\", output); break; - case 't': snprintf(output, sizeof(output), "%s\t", output); break; - case '\"': snprintf(output, sizeof(output), "%s\"", output); break; + case 'r': snprintf(output + strlen(output), sizeof(output), "\r"); break; + case 'n': snprintf(output + strlen(output), sizeof(output), "\n"); break; + case '\\': snprintf(output + strlen(output), sizeof(output), "\\"); break; + case 't': snprintf(output + strlen(output), sizeof(output), "\t"); break; + case '\"': snprintf(output + strlen(output), sizeof(output), "\""); break; default: fprintf(stderr, "unknown string escape %c\n", sch); } } else { @@ -346,7 +348,7 @@ JSONReader JSONReader_Create(const char * is) { reader.BeginObject = JSONReader_BeginObject; reader.NextArrayItem = JSONReader_NextArrayItem; reader.NextObjectItem = JSONReader_NextObjectItem; - reader.is_ = (char*)malloc(strlen(is)+1); // NOLINT(*) + reader.is_ = (char*)vmalloc(strlen(is)+1); // NOLINT(*) memset(reader.is_, 0, strlen(is)+1); snprintf(reader.is_, strlen(is)+1, "%s", is); reader.isptr = reader.is_; @@ -355,5 +357,5 @@ JSONReader JSONReader_Create(const char * is) { void JSONReader_Release(JSONReader * reader) { SeqRelease(&(reader->scope_counter_)); - free(reader->is_); + vfree(reader->is_); } diff --git a/src/runtime/crt/logging.h b/src/runtime/crt/logging.h new file mode 100644 index 0000000..2c58834 --- /dev/null +++ b/src/runtime/crt/logging.h @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file runtime/crt/loggin.h + * \brief A replacement of the dmlc logging system that avoids + * the usage of GLOG and C++ headers + */ + +#ifndef TVM_RUNTIME_CRT_LOGGING_H_ +#define TVM_RUNTIME_CRT_LOGGING_H_ + +#ifndef CHECK +#define CHECK(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "Check failed: %s\n", #x); \ + exit(-1); \ + } \ + }while(0) +#endif + +#ifndef CHECK_BINARY_OP +#define CHECK_BINARY_OP(op, x, y, fmt, ...) \ + do { \ + if (!(x op y)) { \ + fprintf(stderr, "Check failed: %s %s %s: " fmt "\n", #x, #op, #y, ##__VA_ARGS__); \ + exit(-1); \ + } \ + }while(0) +#endif + +#ifndef CHECK_LT +#define CHECK_LT(x, y, fmt, ...) CHECK_BINARY_OP(<, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_GT +#define CHECK_GT(x, y, fmt, ...) CHECK_BINARY_OP(>, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_LE +#define CHECK_LE(x, y, fmt, ...) CHECK_BINARY_OP(<=, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_GE +#define CHECK_GE(x, y, fmt, ...) CHECK_BINARY_OP(>=, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_EQ +#define CHECK_EQ(x, y, fmt, ...) CHECK_BINARY_OP(==, x, y, fmt, ##__VA_ARGS__) +#endif + +#ifndef CHECK_NE +#define CHECK_NE(x, y, fmt, ...) CHECK_BINARY_OP(!=, x, y, fmt, ##__VA_ARGS__) +#endif + +#endif // TVM_RUNTIME_CRT_LOGGING_H_ diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c new file mode 100644 index 0000000..24175f6 --- /dev/null +++ b/src/runtime/crt/memory.c @@ -0,0 +1,393 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file memory.c + * \brief Virtal memory manager + * + * To maximize portability, thread-safe feature has been dropped for now. + */ + +#include <tvm/runtime/c_runtime_api.h> +#include <tvm/runtime/crt/memory.h> + +#include <inttypes.h> + +#include "logging.h" + +/*! Number of bits in a page */ +#define TVM_CRT_PAGE_BITS (TVM_CRT_PAGE_BYTES << 3) + +/*! \brief Translate log memory size into bytes */ +#define TVM_CRT_VIRT_MEM_SIZE (1 << TVM_CRT_LOG_VIRT_MEM_SIZE) + +/*! \brief Number of possible page entries in total */ +#define TVM_CRT_MAX_PAGES (TVM_CRT_VIRT_MEM_SIZE / TVM_CRT_PAGE_BYTES) + +/*! \brief Physical address type */ +typedef uint32_t tvm_phy_addr_t; + +/*! \brief The bits in page table */ +static const tvm_phy_addr_t kPageBits = TVM_CRT_PAGE_BITS; + +/*! \brief Page size, also the maximum allocable size */ +static const tvm_phy_addr_t kPageSize = TVM_CRT_PAGE_BYTES; + +/** + * \brief Memory pool for virtual dynamic memory allocation + */ +static char g_memory_pool[TVM_CRT_VIRT_MEM_SIZE]; + +/*! \brief A page in the DRAM */ +typedef struct Page { + /*! \brief Start location in page table */ + tvm_index_t ptable_begin; + /*! \brief The total number of pages */ + tvm_index_t num_pages; + /*! \brief Data */ + char * data; +} Page; + +// construct a new page +Page PageCreate(tvm_index_t ptable_begin, tvm_index_t num_pages) { + Page page; + page.ptable_begin = ptable_begin; + page.num_pages = num_pages; + page.data = g_memory_pool + ptable_begin * kPageSize; + return page; +} + +typedef struct PageTable { + Page page[TVM_CRT_MAX_PAGES]; + uint32_t count; + void (*resize)(struct PageTable * ptable, uint32_t size, Page * page); +} PageTable; + +void PageTable_Resize(struct PageTable * ptable, uint32_t new_size, Page * page) { + CHECK_LE(ptable->count, new_size, + "size value (%d) is smaller than expected (%d).", new_size, ptable->count); + for (uint32_t idx = ptable->count; idx < new_size; idx++) { + ptable->page[idx] = *page; + } + ptable->count = new_size; +} + +typedef struct PageEntry { + char * addr; + Page page; +} PageEntry; + +typedef struct TLB { + PageEntry entries[TVM_CRT_MAX_PAGES]; + uint32_t count; + void (*set)(struct TLB * tlb, char * data, Page * page); + PageEntry * (*find)(struct TLB * tlb, char * data); +} TLB; + +void TLB_Set(TLB * tlb, char * data, Page * page) { + PageEntry * entry = tlb->find(tlb, data); + if (entry == 0) { + tlb->entries[tlb->count].addr = data; + tlb->entries[tlb->count].page = *page; + tlb->count++; + } else { + entry->addr = data; + entry->page = *page; + } +} + +PageEntry * TLB_Find(TLB * tlb, char * data) { + PageEntry * entry = 0; + for (uint32_t idx = 0; idx < tlb->count; idx++) { + if (tlb->entries[idx].addr == data) { + entry = tlb->entries + idx; + break; + } + } + return entry; +} + +typedef struct IndexedEntry { + tvm_index_t index; + Page page; +} IndexedEntry; + +typedef struct MultiMap { + IndexedEntry entries[TVM_CRT_MAX_PAGES]; + uint32_t count; + IndexedEntry * (*lower_bound)(struct MultiMap * map, uint32_t npage); + IndexedEntry * (*end)(struct MultiMap * map); + void (*erase)(struct MultiMap * map, IndexedEntry * entry); + void (*insert)(struct MultiMap * map, uint32_t npage, Page * p); +} MultiMap; + +IndexedEntry * MultiMap_LowerBound(struct MultiMap * map, uint32_t npage) { + IndexedEntry * entry = 0; + for (uint32_t idx = 0; idx < map->count; idx++) { + if (map->entries[idx].index >= npage) { + entry = map->entries + idx; + break; + } + } + return entry; +} + +IndexedEntry * MultiMap_End(struct MultiMap * map) { + IndexedEntry * entry = 0; + return entry; +} + +void MultiMap_Erase(struct MultiMap * map, IndexedEntry * entry) { + for (uint32_t idx = 0; idx < map->count; idx++) { + if ((map->entries + idx) == entry) { + memcpy(map->entries + idx, map->entries + (idx + 1), + sizeof(IndexedEntry) * (map->count - idx)); + map->count--; + break; + } + } +} + +void MultiMap_Insert(struct MultiMap * map, uint32_t npage, Page * p) { + CHECK_LE(map->count + 1, TVM_CRT_MAX_PAGES, "invalid number of free pages."); + for (uint32_t idx = map->count; idx < (map->count + npage); idx++) { + map->entries[map->count].index = npage; + map->entries[map->count].page = *p; + } + map->count++; +} + +/*! + * \brief DRAM memory manager + * Implements simple paging to allow physical address translation. + */ +typedef struct MemoryManager { + /*! + * \brief Allocate memory from manager + * \param size The size of memory + * \return The virtual address + */ + void* (*Alloc)(struct MemoryManager * mgr, tvm_index_t size); + /*! + * \brief Allocate memory from manager + * \param ptr The pointer to the memory area to be reallocated + * \param size The size of memory + * \return The virtual address + */ + void* (*Realloc)(struct MemoryManager * mgr, void * ptr, tvm_index_t size); + /*! + * \brief Free the memory. + * \param ptr The pointer to the memory to deallocate + * \return The virtual address + */ + void (*Free)(struct MemoryManager * mgr, void* data); + + // Physical address -> page + PageTable ptable; + // Virtual address -> page + TLB pmap; + // Free map + MultiMap free_map; +} MemoryManager; + +/*! + * \brief Allocate memory from manager + * \param size The size of memory + * \return The virtual address + */ +void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) { + char * data = 0; + tvm_index_t npage = (size + kPageSize - 1) / kPageSize; + MultiMap * free_map = &(mgr->free_map); + IndexedEntry * it = free_map->lower_bound(free_map, npage); + tvm_index_t start = 0; + if (it != free_map->end(free_map)) { + Page p = it->page; + free_map->erase(free_map, it); + data = p.data; + start = p.ptable_begin; + npage = p.num_pages; + } else { + PageTable * ptable = &(mgr->ptable); + start = ptable->count; + CHECK_LE((unsigned)(start + npage), (sizeof(g_memory_pool) / kPageSize), + "insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "", + start, npage, start + npage); + /* insert page entry */ + Page p = PageCreate(start, npage); + ptable->resize(ptable, start + npage, &p); + data = p.data; + TLB * pmap = &(mgr->pmap); + pmap->set(pmap, data, &p); + } + vleak_size++; +#if TVM_CRT_DEBUG > 1 + printf("allocate: addr=%p, start=%d/%d, npage=%d, vleak=%d\n", + data, start, TVM_CRT_MAX_PAGES, npage, vleak_size); +#endif // TVM_CRT_DEBUG + return data; +} + +/*! + * \brief Reallocate memory from manager + * \param ptr The pointer to the memory area to be reallocated + * \param size The size of memory + * \return The virtual address + */ +void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) { + char * data = (char*)ptr; // NOLINT(*) + PageTable * ptable = &(mgr->ptable); + TLB * pmap = &(mgr->pmap); + MultiMap * free_map = &(mgr->free_map); + tvm_index_t start = 0; + tvm_index_t npage = (size + kPageSize - 1) / kPageSize; + if (ptr) { + // get page size for given pointer + CHECK_NE(pmap->count, 0, "invalid translation look-aside buffer."); + PageEntry * entry = pmap->find(pmap, (char*)ptr); // NOLINT(*) + CHECK_NE(entry, 0, "no valid page entry found."); + Page * pptr = &(entry->page); + // if the page size is smaller than target page size, + // try allocate new space + if (pptr->num_pages < npage) { + // TODO(liangfu): found out whether we can extend current entry + // + // insert new page entry + IndexedEntry * it = free_map->lower_bound(free_map, npage); + if (it != free_map->end(free_map)) { + data = it->page.data; + start = it->page.ptable_begin; + npage = it->page.num_pages; + free_map->erase(free_map, it); + } else { + start = ptable->count; + CHECK_LE((unsigned)(start + npage), (sizeof(g_memory_pool) / kPageSize), + "insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "", + start, npage, start + npage); + Page p = PageCreate(start, npage); + ptable->resize(ptable, start + npage, &p); + data = p.data; + pmap->set(pmap, data, &p); + } + // copy previous data to the new entry + memcpy(data, ptr, kPageSize * pptr->num_pages); + // release memory + free_map->insert(free_map, pptr->num_pages, pptr); + } else { + start = pptr->ptable_begin; + } + } else { + IndexedEntry * it = free_map->lower_bound(free_map, npage); + if (it != free_map->end(free_map)) { + Page p = it->page; + free_map->erase(free_map, it); + data = p.data; + start = p.ptable_begin; + npage = p.num_pages; + } else { + PageTable * ptable = &(mgr->ptable); + start = ptable->count; + CHECK_LE((unsigned)(start + npage), (sizeof(g_memory_pool) / kPageSize), + "insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "", + start, npage, start + npage); + /* insert page entry */ + Page p = PageCreate(start, npage); + ptable->resize(ptable, start + npage, &p); + data = p.data; + TLB * pmap = &(mgr->pmap); + pmap->set(pmap, data, &p); + } + vleak_size++; + } +#if TVM_CRT_DEBUG > 1 + printf("reallocate: addr=%p, start=%d/%d, npage=%d, vleak=%d, size=%d\n", + data, start, TVM_CRT_MAX_PAGES, npage, vleak_size, size); +#endif // TVM_CRT_DEBUG + return data; +} + +/*! + * \brief Free the memory. + * \param ptr The pointer to the memory to deallocate + * \return The virtual address + */ +void MemoryManager_Free(MemoryManager * mgr, void* ptr) { + TLB * pmap = &(mgr->pmap); + CHECK_NE(pmap->count, 0, "invalid translation look-aside buffer."); + PageEntry * entry = pmap->find(pmap, (char*)ptr); // NOLINT(*) + CHECK_NE(entry, 0, "no valid page entry found."); + Page * p = &(entry->page); + MultiMap * free_map = &(mgr->free_map); + free_map->insert(free_map, p->num_pages, p); + vleak_size--; +#if TVM_CRT_DEBUG > 1 + printf("release: addr=%p, start=%d/%d, npage=%d, vleak=%d\n", + ptr, entry->page.ptable_begin, TVM_CRT_MAX_PAGES, entry->page.num_pages, vleak_size); +#endif // TVM_CRT_DEBUG +} + +MemoryManager * MemoryManagerCreate() { + static MemoryManager mgr; + memset(&mgr, 0, sizeof(MemoryManager)); + /* handle MemoryManager member functions */ + mgr.Alloc = MemoryManager_Alloc; + mgr.Realloc = MemoryManager_Realloc; + mgr.Free = MemoryManager_Free; + /* handle PageTable member functions */ + mgr.ptable.resize = PageTable_Resize; + /* handle TLB member functions */ + mgr.pmap.set = TLB_Set; + mgr.pmap.find = TLB_Find; + /* handle free_map member functions */ + mgr.free_map.lower_bound = MultiMap_LowerBound; + mgr.free_map.end = MultiMap_End; + mgr.free_map.erase = MultiMap_Erase; + mgr.free_map.insert = MultiMap_Insert; + return &mgr; +} + +MemoryManager * TVMGetGlobalMemoryManager() { + /* initialize once */ + static uint32_t initialized = 0; + static MemoryManager * mgr; + if (!initialized) { + mgr = MemoryManagerCreate(); + memset(g_memory_pool, 0, sizeof(g_memory_pool)); + initialized = 1; + } + return mgr; +} + +/** \brief Allocate memory from manager */ +void * vmalloc(size_t size) { + MemoryManager * mgr = TVMGetGlobalMemoryManager(); + return mgr->Alloc(mgr, size); +} + +/** \brief Reallocate memory from manager */ +void * vrealloc(void * ptr, size_t size) { + MemoryManager * mgr = TVMGetGlobalMemoryManager(); + return mgr->Realloc(mgr, ptr, size); +} + +/** \brief Release memory from manager */ +void vfree(void * ptr) { + MemoryManager * mgr = TVMGetGlobalMemoryManager(); + mgr->Free(mgr, ptr); +} diff --git a/src/runtime/crt/module.h b/src/runtime/crt/module.h index 8ff979b..9ef287d 100644 --- a/src/runtime/crt/module.h +++ b/src/runtime/crt/module.h @@ -24,11 +24,10 @@ #ifndef TVM_RUNTIME_CRT_MODULE_H_ #define TVM_RUNTIME_CRT_MODULE_H_ -#include <string.h> #include <tvm/runtime/c_runtime_api.h> +#include <string.h> struct TVMPackedFunc; -typedef struct TVMPackedFunc TVMPackedFunc; /*! * \brief Module container of TVM. @@ -42,7 +41,7 @@ typedef struct TVMModule { * * This function will return PackedFunc(nullptr) if function do not exist. */ - void (*GetFunction)(const char * name, TVMPackedFunc * pf); + void (*GetFunction)(struct TVMModule * mod, const char * name, struct TVMPackedFunc * pf); } TVMModule; #endif // TVM_RUNTIME_CRT_MODULE_H_ diff --git a/src/runtime/crt/ndarray.c b/src/runtime/crt/ndarray.c index 016fdd5..4b4ab68 100644 --- a/src/runtime/crt/ndarray.c +++ b/src/runtime/crt/ndarray.c @@ -22,6 +22,8 @@ * \brief NDArray container infratructure. */ +#include <tvm/runtime/crt/memory.h> + #include "ndarray.h" TVMNDArray TVMNDArray_Create(uint32_t ndim, const tvm_index_t * shape, @@ -29,7 +31,7 @@ TVMNDArray TVMNDArray_Create(uint32_t ndim, const tvm_index_t * shape, TVMNDArray ret; memset(&ret, 0, sizeof(TVMNDArray)); ret.dl_tensor.ndim = ndim; - ret.dl_tensor.shape = (int64_t*)malloc(sizeof(int64_t)*ndim); // NOLINT(*) + ret.dl_tensor.shape = (int64_t*)vmalloc(sizeof(int64_t)*ndim); // NOLINT(*) memcpy(ret.dl_tensor.shape, shape, sizeof(int64_t)*ndim); ret.dl_tensor.dtype = dtype; ret.dl_tensor.ctx = ctx; @@ -109,7 +111,9 @@ TVMNDArray TVMNDArray_CreateView(TVMNDArray * arr, const tvm_index_t * shape, } int TVMNDArray_Release(TVMNDArray * arr) { - free(arr->dl_tensor.data); - free(arr->dl_tensor.shape); + vfree(arr->dl_tensor.data); + arr->dl_tensor.data = 0; + vfree(arr->dl_tensor.shape); + arr->dl_tensor.shape = 0; return 0; } diff --git a/src/runtime/crt/packed_func.h b/src/runtime/crt/packed_func.h index 21370b6..93898a4 100644 --- a/src/runtime/crt/packed_func.h +++ b/src/runtime/crt/packed_func.h @@ -112,14 +112,12 @@ static inline void TVMPackedFunc_SetArgs(TVMPackedFunc * pf, const TVMArgs * arg memcpy(&(pf->args), args, sizeof(TVMArgs)); } -TVMPackedFunc g_fexecs[GRAPH_RUNTIME_MAX_NODES]; +TVMPackedFunc * g_fexecs = 0; uint32_t g_fexecs_count = 0; -void TVMPackedFunc_SetupExecs(); - // Implement TVMModule::GetFunction // Put implementation in this file so we have seen the TVMPackedFunc -static inline void TVMModule_GetFunction(const char * name, TVMPackedFunc * pf) { +static inline void TVMModule_GetFunction(TVMModule * mod, const char * name, TVMPackedFunc * pf) { int idx; memset(pf, 0, sizeof(TVMPackedFunc)); assert(strlen(name) <= sizeof(pf->name)); @@ -127,13 +125,13 @@ static inline void TVMModule_GetFunction(const char * name, TVMPackedFunc * pf) pf->Call = TVMPackedFunc_Call; pf->SetArgs = TVMPackedFunc_SetArgs; pf->fexec = &TVMNoOperation; - for (idx = 0; idx < GRAPH_RUNTIME_MAX_NODES; idx++) { + for (idx = 0; idx < g_fexecs_count; idx++) { if (!strcmp(g_fexecs[idx].name, name)) { pf->fexec = g_fexecs[idx].fexec; break; } } - if (idx == GRAPH_RUNTIME_MAX_NODES) { + if (idx == g_fexecs_count) { fprintf(stderr, "function handle for %s not found\n", name); } } diff --git a/src/runtime/crt/module.h b/tests/cpp/crt_memory_test.cc similarity index 51% copy from src/runtime/crt/module.h copy to tests/cpp/crt_memory_test.cc index 8ff979b..1c12916 100644 --- a/src/runtime/crt/module.h +++ b/tests/cpp/crt_memory_test.cc @@ -17,32 +17,37 @@ * under the License. */ -/*! - * \file src/runtime/crt/module.h - * \brief Runtime container of the functions - */ -#ifndef TVM_RUNTIME_CRT_MODULE_H_ -#define TVM_RUNTIME_CRT_MODULE_H_ +#define TVM_CRT_LOG_VIRT_MEM_SIZE 16 +#define TVM_CRT_PAGE_BYTES 4096 -#include <string.h> -#include <tvm/runtime/c_runtime_api.h> +#include <gtest/gtest.h> +#include <tvm/runtime/crt/memory.h> -struct TVMPackedFunc; -typedef struct TVMPackedFunc TVMPackedFunc; +#include "../../src/runtime/crt/memory.c" -/*! - * \brief Module container of TVM. - */ -typedef struct TVMModule { - /*! - * \brief Get packed function from current module by name. - * - * \param name The name of the function. - * \param pf The result function. - * - * This function will return PackedFunc(nullptr) if function do not exist. - */ - void (*GetFunction)(const char * name, TVMPackedFunc * pf); -} TVMModule; +TEST(CRTMemory, Alloc) { + for (int idx = 0; idx < 65536; idx++) { + void * a = vmalloc(1); + EXPECT_EQ(vleak_size, 1); + vfree(a); + EXPECT_EQ(vleak_size, 0); + } +} + +TEST(CRTMemory, Realloc) { + for (int idx = 0; idx < 65536; idx++) { + void * a = vrealloc(0, 1); + EXPECT_EQ(vleak_size, 1); + void * b = vrealloc(a, 1); + EXPECT_EQ(a, b); + EXPECT_EQ(vleak_size, 1); + vfree(a); + EXPECT_EQ(vleak_size, 0); + } +} -#endif // TVM_RUNTIME_CRT_MODULE_H_ +int main(int argc, char ** argv) { + testing::InitGoogleTest(&argc, argv); + testing::FLAGS_gtest_death_test_style = "threadsafe"; + return RUN_ALL_TESTS(); +}