This is an automated email from the ASF dual-hosted git repository.

jwfromm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new a52b575259 [OpenCL Textures] Fix memory management in texture pool 
(#10938)
a52b575259 is described below

commit a52b5752594edb62468bcddd713156bec2fa21dd
Author: Egor Churaev <egor.chur...@gmail.com>
AuthorDate: Thu Apr 21 18:28:49 2022 +0300

    [OpenCL Textures] Fix memory management in texture pool (#10938)
    
    Previously, the size of the memory which should be allocated was
    calculated as multiplication width on height. It doesn't work well in
    case when one texture has big size in height and the next one big size
    in width. We tried to reuse the allocated memory and every time when
    the next texture with big size was used we reallocated the previous
    one. It has huge impact on the performance.
    Now we check two dimensions independently. So, in this case we will
    check both dimensions and it helps us to avoid the situation with
    cyclic memory reallocation.
---
 src/runtime/opencl/texture_pool.cc | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/runtime/opencl/texture_pool.cc 
b/src/runtime/opencl/texture_pool.cc
index bf52894da3..e7f6655c41 100644
--- a/src/runtime/opencl/texture_pool.cc
+++ b/src/runtime/opencl/texture_pool.cc
@@ -36,35 +36,41 @@ class TexturePool::Pool {
     Entry e;
     e.data = nullptr;
     if (free_list_.size() != 0) {
-      int64_t req_size = height * width;
       Entry new_mem;
-      int64_t min_added_size = std::numeric_limits<int64_t>::max();
-      int64_t min_wasted_size = std::numeric_limits<int64_t>::max();
+      int64_t min_added_size_x = std::numeric_limits<int64_t>::max();
+      int64_t min_added_size_y = std::numeric_limits<int64_t>::max();
+      int64_t min_wasted_size_x = std::numeric_limits<int64_t>::max();
+      int64_t min_wasted_size_y = std::numeric_limits<int64_t>::max();
       std::vector<Entry>::iterator best_mem;
       for (auto it = free_list_.begin(); it != free_list_.end(); ++it) {
         if (it->type.code != type_hint.code) {
           continue;
         }
-        int64_t old_size = it->x * it->y;
         new_mem.x = std::max(it->x, width);
         new_mem.y = std::max(it->y, height);
-        int64_t new_size = new_mem.x * new_mem.y;
-        int64_t added_size = new_size - old_size;
-        int64_t wasted_size = new_size - req_size;
+        int64_t added_size_x = new_mem.x - it->x;
+        int64_t added_size_y = new_mem.y - it->y;
+        int64_t wasted_size_x = new_mem.x - width;
+        int64_t wasted_size_y = new_mem.y - height;
         // Minimize added size first and wasted size thereafter
-        if ((min_added_size > 0 && added_size < min_added_size) ||
-            (min_added_size == 0 && wasted_size < min_wasted_size)) {
-          min_added_size = added_size;
-          min_wasted_size = wasted_size;
+        if ((min_added_size_x > 0 && added_size_x < min_added_size_x) ||
+            (min_added_size_y > 0 && added_size_y < min_added_size_y) ||
+            (min_added_size_x == added_size_x && wasted_size_x < 
min_wasted_size_x) ||
+            (min_added_size_y == added_size_y && wasted_size_y < 
min_wasted_size_y)) {
+          min_added_size_x = added_size_x;
+          min_added_size_y = added_size_y;
+          min_wasted_size_x = wasted_size_x;
+          min_wasted_size_y = wasted_size_y;
           best_mem = it;
         }
       }
 
-      if (min_added_size == 0) {
+      if (min_added_size_x == 0 && min_added_size_y == 0) {
         // use existing block
         e = *best_mem;
         free_list_.erase(best_mem);
-      } else if (min_added_size <= req_size) {
+      } else if (static_cast<size_t>(min_added_size_x) <= width ||
+                 static_cast<size_t>(min_added_size_y) <= height) {
         // if added size is less or equal to
         // what is needed by alloc, then grow entry
         device->FreeDataSpace(dev, best_mem->data);

Reply via email to