This is an automated email from the ASF dual-hosted git repository. driazati pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push: new b18f6425f9 [Hexagon] [runtime] Manage RPC and runtime buffers separately (#13028) b18f6425f9 is described below commit b18f6425f951f5e9e039f497898be7d3227ddc93 Author: Janet Schneider <21978033+jane...@users.noreply.github.com> AuthorDate: Wed Oct 12 14:57:28 2022 -0400 [Hexagon] [runtime] Manage RPC and runtime buffers separately (#13028) Creates HexagonPageAllocator to manage allocations for the RPC server Adds new Device APIs for RPC buffer management Allocations are tracked by two separate buffer managers: - rpc_hexbuffs is used exclusively for RPC buffers - runtime_hexbuffs is used exclusively for runtime buffers This will fix the throw on shutdown in the simulator. --- src/runtime/hexagon/hexagon_buffer_manager.h | 13 ++++++ src/runtime/hexagon/hexagon_device_api.cc | 48 ++++++++++++++++++---- src/runtime/hexagon/hexagon_device_api.h | 36 ++++++++++------ src/runtime/hexagon/rpc/hexagon/rpc_server.cc | 32 ++++++++++++++- .../hexagon/hexagon_device_api_tests.cc | 27 ++++++++---- 5 files changed, 127 insertions(+), 29 deletions(-) diff --git a/src/runtime/hexagon/hexagon_buffer_manager.h b/src/runtime/hexagon/hexagon_buffer_manager.h index a698b0ecb1..eecf96a6db 100644 --- a/src/runtime/hexagon/hexagon_buffer_manager.h +++ b/src/runtime/hexagon/hexagon_buffer_manager.h @@ -25,6 +25,7 @@ #include <memory> #include <unordered_map> #include <utility> +#include <vector> #include "hexagon_buffer.h" @@ -85,6 +86,18 @@ class HexagonBufferManager { return hexagon_buffer_map_.empty(); } + //! \brief Returns a vector of currently allocated pointers, owned by the manager. + // Note - this should only be used by the device API to keep track of what + // was in the manager when HexagonDeviceAPI::ReleaseResources is called. + std::vector<void*> current_allocations() { + std::vector<void*> allocated; + std::lock_guard<std::mutex> lock(map_mutex_); + for (const auto& [data_ptr, buffer] : hexagon_buffer_map_) { + allocated.push_back(data_ptr); + } + return allocated; + } + private: //! \brief Contains the HexagonBuffer objects managed by this class. std::unordered_map<void*, std::unique_ptr<HexagonBuffer>> hexagon_buffer_map_; diff --git a/src/runtime/hexagon/hexagon_device_api.cc b/src/runtime/hexagon/hexagon_device_api.cc index 7c251721b7..3574ab5018 100644 --- a/src/runtime/hexagon/hexagon_device_api.cc +++ b/src/runtime/hexagon/hexagon_device_api.cc @@ -90,18 +90,23 @@ void* HexagonDeviceAPI::AllocDataSpace(Device dev, int ndim, const int64_t* shap const size_t typesize = (dtype.bits / 8) * dtype.lanes; + CHECK(runtime_hexbuffs) << "Attempted to allocate Hexagon data with " + << "HexagonDeviceAPI::AllocDataSpace before initializing resources. " + << "Please call HexagonDeviceAPI::AcquireResources"; + if (ndim == 0) { // Allocate storage for a single scalar value. - return mgr->AllocateHexagonBuffer(typesize, kHexagonAllocAlignment, mem_scope); + return runtime_hexbuffs->AllocateHexagonBuffer(typesize, kHexagonAllocAlignment, mem_scope); } else if (ndim == 1) { // Allocate a single, contiguous memory region. size_t nbytes = shape[0] * typesize; - return mgr->AllocateHexagonBuffer(nbytes, kHexagonAllocAlignment, mem_scope); + return runtime_hexbuffs->AllocateHexagonBuffer(nbytes, kHexagonAllocAlignment, mem_scope); } else if (ndim == 2) { // Allocate the region(s) needed for Hexagon's indirect-tensor format. size_t nallocs = shape[0]; size_t nbytes = shape[1] * typesize; - return mgr->AllocateHexagonBuffer(nallocs, nbytes, kHexagonAllocAlignment, mem_scope); + return runtime_hexbuffs->AllocateHexagonBuffer(nallocs, nbytes, kHexagonAllocAlignment, + mem_scope); } else { return nullptr; // unreachable } @@ -115,13 +120,34 @@ void* HexagonDeviceAPI::AllocDataSpace(Device dev, size_t nbytes, size_t alignme if (alignment < kHexagonAllocAlignment) { alignment = kHexagonAllocAlignment; } - return mgr->AllocateHexagonBuffer(nbytes, alignment, String("global")); + CHECK(runtime_hexbuffs) << "Attempted to allocate Hexagon data with " + << "HexagonDeviceAPI::AllocDataSpace before initializing resources. " + << "Please call HexagonDeviceAPI::AcquireResources"; + return runtime_hexbuffs->AllocateHexagonBuffer(nbytes, alignment, String("global")); } void HexagonDeviceAPI::FreeDataSpace(Device dev, void* ptr) { CHECK(ptr) << "buffer pointer is null"; CHECK(IsValidDevice(dev)) << "dev.device_type: " << dev.device_type; - mgr->FreeHexagonBuffer(ptr); + if (runtime_hexbuffs) { + runtime_hexbuffs->FreeHexagonBuffer(ptr); + } else { + auto it = std::find(released_runtime_buffers.begin(), released_runtime_buffers.end(), ptr); + CHECK(it != released_runtime_buffers.end()) << "Attempted to free Hexagon data with " + << "HexagonDeviceAPI::FreeDataSpace that was not " + << "allocated during the session."; + } +} + +void* HexagonDeviceAPI::AllocRpcBuffer(size_t nbytes, size_t alignment) { + CHECK(nbytes) << "number of bytes is zero"; + CHECK(alignment) << "alignment is zero"; + return rpc_hexbuffs.AllocateHexagonBuffer(nbytes, alignment, String("global")); +} + +void HexagonDeviceAPI::FreeRpcBuffer(void* ptr) { + CHECK(ptr) << "buffer pointer is null"; + rpc_hexbuffs.FreeHexagonBuffer(ptr); } // WorkSpace: runtime allocations for Hexagon @@ -137,7 +163,10 @@ void* HexagonDeviceAPI::AllocWorkspace(Device dev, size_t size, DLDataType type_ void HexagonDeviceAPI::FreeWorkspace(Device dev, void* data) { CHECK(IsValidDevice(dev)) << "dev.device_type: " << dev.device_type; - CHECK(mgr->count(data) != 0) + CHECK(runtime_hexbuffs) << "Attempted to free Hexagon workspace with " + << "HexagonDeviceAPI::FreeWorkspace outside of a session. " + << "Please call HexagonDeviceAPI::AcquireResources"; + CHECK(runtime_hexbuffs->count(data) != 0) << "Attempt made to free unknown or already freed workspace allocation"; dmlc::ThreadLocalStore<HexagonWorkspacePool>::Get()->FreeWorkspace(dev, data); } @@ -160,8 +189,13 @@ void HexagonDeviceAPI::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHan CHECK_EQ(from->byte_offset, 0); CHECK_EQ(to->byte_offset, 0); CHECK_EQ(GetDataSize(*from), GetDataSize(*to)); + CHECK(runtime_hexbuffs) << "Attempted to copy Hexagon data with " + << "HexagonDeviceAPI::CopyDataFromTo before initializing resources. " + << "Please call HexagonDeviceAPI::AcquireResources"; - auto lookup_hexagon_buffer = [this](void* ptr) -> HexagonBuffer* { return mgr->find(ptr); }; + auto lookup_hexagon_buffer = [this](void* ptr) -> HexagonBuffer* { + return runtime_hexbuffs->find(ptr); + }; HexagonBuffer* hex_from_buf = lookup_hexagon_buffer(from->data); HexagonBuffer* hex_to_buf = lookup_hexagon_buffer(to->data); diff --git a/src/runtime/hexagon/hexagon_device_api.h b/src/runtime/hexagon/hexagon_device_api.h index 1c802f3530..8d2795e7a0 100644 --- a/src/runtime/hexagon/hexagon_device_api.h +++ b/src/runtime/hexagon/hexagon_device_api.h @@ -48,7 +48,7 @@ class HexagonDeviceAPI final : public DeviceAPI { static HexagonDeviceAPI* Global(); //! \brief Constructor - HexagonDeviceAPI() { mgr = &hexbuffs; } + HexagonDeviceAPI() {} //! \brief Destructor ~HexagonDeviceAPI() {} @@ -60,7 +60,7 @@ class HexagonDeviceAPI final : public DeviceAPI { CHECK_EQ(runtime_hexbuffs, nullptr); runtime_hexbuffs = std::make_unique<HexagonBufferManager>(); - mgr = runtime_hexbuffs.get(); + released_runtime_buffers.clear(); CHECK_EQ(runtime_threads, nullptr); runtime_threads = std::make_unique<HexagonThreadManager>(threads, stack_size, pipe_size); @@ -78,10 +78,10 @@ class HexagonDeviceAPI final : public DeviceAPI { runtime_threads.reset(); CHECK(runtime_hexbuffs) << "runtime_hexbuffs was not created in AcquireResources"; - if (runtime_hexbuffs && !runtime_hexbuffs->empty()) { - LOG(INFO) << "runtime_hexbuffs was not empty in ReleaseResources"; + if (!runtime_hexbuffs->empty()) { + DLOG(INFO) << "runtime_hexbuffs was not empty in ReleaseResources"; + released_runtime_buffers = runtime_hexbuffs->current_allocations(); } - mgr = &hexbuffs; runtime_hexbuffs.reset(); CHECK(runtime_vtcm) << "runtime_vtcm was not created in AcquireResources"; @@ -106,6 +106,12 @@ class HexagonDeviceAPI final : public DeviceAPI { //! \brief Free the allocated HexagonBuffer. void FreeDataSpace(Device dev, void* ptr) final; + //! \brief Hexagon-only interface to allocate buffers used for the RPC server + void* AllocRpcBuffer(size_t nbytes, size_t alignment); + + //! \brief Hexagon-only interface to free buffers used for the RPC server + void FreeRpcBuffer(void* ptr); + /*! \brief Request a dynamically allocated HexagonBuffer from a workspace pool. * \returns The underlying allocation pointer. */ @@ -190,15 +196,21 @@ class HexagonDeviceAPI final : public DeviceAPI { (DLDeviceType(dev.device_type) == kDLCPU); } - //! \brief Manages underlying HexagonBuffer allocations - // runtime_hexbuffs is used for runtime allocations. It is created - // with a call to AcquireResources, and destroyed on ReleaseResources. - // hexbuffs is used for all allocations outside of the session lifetime. - HexagonBufferManager hexbuffs; + //! \brief Manages RPC HexagonBuffer allocations + // rpc_hexbuffs is used only in Alloc/FreeRpcBuffer. It is static because it lives for the + // lifetime of the static Device API. + HexagonBufferManager rpc_hexbuffs; + + //! \brief Manages runtime HexagonBuffer allocations + // runtime_hexbuffs is used for runtime allocations, separate from rpc_hexbuffs. It is created + // with a call to AcquireResources, and destroyed on ReleaseResources. The buffers in this + // manager are scoped to the lifetime of a user application session. std::unique_ptr<HexagonBufferManager> runtime_hexbuffs; - //! \brief Current buffer manager - HexagonBufferManager* mgr; + //! \brief Keeps a list of released runtime HexagonBuffer allocations + // ReleaseResources can be called when there are still buffers in runtime_hexbuffs. This list + // stores the buffers that were released. + std::vector<void*> released_runtime_buffers; //! \brief Thread manager std::unique_ptr<HexagonThreadManager> runtime_threads; diff --git a/src/runtime/hexagon/rpc/hexagon/rpc_server.cc b/src/runtime/hexagon/rpc/hexagon/rpc_server.cc index 22a54043cd..29c3a1bdfe 100644 --- a/src/runtime/hexagon/rpc/hexagon/rpc_server.cc +++ b/src/runtime/hexagon/rpc/hexagon/rpc_server.cc @@ -38,6 +38,7 @@ extern "C" { #include "../../../library_module.h" #include "../../../minrpc/minrpc_server.h" #include "../../hexagon/hexagon_common.h" +#include "../../hexagon/hexagon_device_api.h" #include "hexagon_rpc.h" namespace tvm { @@ -145,6 +146,35 @@ class HexagonIOHandler { uint32_t write_buffer_available_length_; }; +// Internal allocator that redirects alloc to TVM's C API. +template <typename TIOHandler> +class HexagonPageAllocator { + public: + using ArenaPageHeader = tvm::support::ArenaPageHeader; + + explicit HexagonPageAllocator(TIOHandler* io) : io_(io) {} + + ArenaPageHeader* allocate(size_t min_size) { + size_t npages = ((min_size + kPageSize - 1) / kPageSize); + void* data; + + data = HexagonDeviceAPI::Global()->AllocRpcBuffer(npages * kPageSize, kPageAlign); + + ArenaPageHeader* header = static_cast<ArenaPageHeader*>(data); + header->size = npages * kPageSize; + header->offset = sizeof(ArenaPageHeader); + return header; + } + + void deallocate(ArenaPageHeader* page) { HexagonDeviceAPI::Global()->FreeRpcBuffer(page); } + + static const constexpr int kPageSize = 2 << 10; + static const constexpr int kPageAlign = 8; + + private: + TIOHandler* io_; +}; + class HexagonRPCServer { public: explicit HexagonRPCServer(uint8_t* receive_buffer, size_t receive_buffer_size_bytes) @@ -185,7 +215,7 @@ class HexagonRPCServer { private: HexagonIOHandler io_; - MinRPCServer<HexagonIOHandler> rpc_server_; + MinRPCServer<HexagonIOHandler, HexagonPageAllocator> rpc_server_; }; } // namespace hexagon diff --git a/tests/cpp-runtime/hexagon/hexagon_device_api_tests.cc b/tests/cpp-runtime/hexagon/hexagon_device_api_tests.cc index 2139aa78f7..e262a16ada 100644 --- a/tests/cpp-runtime/hexagon/hexagon_device_api_tests.cc +++ b/tests/cpp-runtime/hexagon/hexagon_device_api_tests.cc @@ -147,22 +147,31 @@ TEST_F(HexagonDeviceAPITest, DISABLED_alloc_free_diff_dev) { EXPECT_THROW(hexapi->FreeDataSpace(cpu_dev, buf), InternalError); } -// Alloc a non-runtime buffer -// Alloc a runtime buffer -// "Release" resources for runtime -// Verify the runtime buffer cannot be freed, but the non-runtime buffer can -// This test should be run last -TEST_F(HexagonDeviceAPITest, leak_resources) { +// Ensure runtime buffer manager is properly configured and destroyed +// in Acquire/Release +TEST_F(HexagonDeviceAPITest, runtime_buffer_manager) { hexapi->ReleaseResources(); - void* pre_runtime_buf = hexapi->AllocDataSpace(hex_dev, nbytes, alignment, int8); - CHECK(pre_runtime_buf != nullptr); + EXPECT_THROW(hexapi->AllocDataSpace(hex_dev, nbytes, alignment, int8), InternalError); hexapi->AcquireResources(); void* runtime_buf = hexapi->AllocDataSpace(hex_dev, nbytes, alignment, int8); CHECK(runtime_buf != nullptr); hexapi->ReleaseResources(); + hexapi->FreeDataSpace(hex_dev, runtime_buf); + hexapi->AcquireResources(); EXPECT_THROW(hexapi->FreeDataSpace(hex_dev, runtime_buf), InternalError); - hexapi->FreeDataSpace(hex_dev, pre_runtime_buf); +} + +// Ensure RPC buffer manager is always available +TEST_F(HexagonDeviceAPITest, rpc_buffer_manager) { + void* rpc_buf; + rpc_buf = hexapi->AllocRpcBuffer(nbytes, alignment); + CHECK(rpc_buf != nullptr); + hexapi->ReleaseResources(); + hexapi->FreeRpcBuffer(rpc_buf); + rpc_buf = hexapi->AllocRpcBuffer(nbytes, alignment); + CHECK(rpc_buf != nullptr); hexapi->AcquireResources(); + hexapi->FreeRpcBuffer(rpc_buf); } // Ensure thread manager is properly configured and destroyed