This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm-ffi.git
The following commit(s) were added to refs/heads/main by this push:
new 7092774 [CYTHON] Fix memory leak when tensor alloc returns to python
(#87)
7092774 is described below
commit 70927743bd9f9e24eba65a06eb7a695137c49522
Author: Tianqi Chen <[email protected]>
AuthorDate: Fri Oct 3 16:41:38 2025 -0400
[CYTHON] Fix memory leak when tensor alloc returns to python (#87)
This PR fixes a memory leak when tensor alloc inside the function and
return to python. We need to delete the chandle to prevent memory leak
as the ownership count is transfered to DLManagedTensor.
---
pyproject.toml | 2 +-
python/tvm_ffi/__init__.py | 2 +-
python/tvm_ffi/cython/tensor.pxi | 6 ++++++
tests/python/test_load_inline.py | 31 +++++++++++++++++++++++++++++++
4 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index f63e89e..acf62e2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@
[project]
name = "apache-tvm-ffi"
-version = "0.1.0b14"
+version = "0.1.0b15"
description = "tvm ffi"
authors = [{ name = "TVM FFI team" }]
diff --git a/python/tvm_ffi/__init__.py b/python/tvm_ffi/__init__.py
index c9b6da5..caf994f 100644
--- a/python/tvm_ffi/__init__.py
+++ b/python/tvm_ffi/__init__.py
@@ -17,7 +17,7 @@
"""TVM FFI Python package."""
# version
-__version__ = "0.1.0b14"
+__version__ = "0.1.0b15"
# order matters here so we need to skip isort here
# isort: skip_file
diff --git a/python/tvm_ffi/cython/tensor.pxi b/python/tvm_ffi/cython/tensor.pxi
index dc8b75e..74b065b 100644
--- a/python/tvm_ffi/cython/tensor.pxi
+++ b/python/tvm_ffi/cython/tensor.pxi
@@ -349,8 +349,14 @@ cdef inline object
make_tensor_from_chandle(TVMFFIObjectHandle chandle, DLPackTo
c_dlpack_to_pyobject(dlpack, &py_obj)
tensor = <Tensor>(<PyObject*>py_obj)
Py_DECREF(tensor)
+ # decref original handle to prevent leak.
+ # note that DLManagedTensor also hold a reference to the tensor
+ # so we need to decref the original handle if the conversion
is successful
+ TVMFFIObjectDecRef(chandle)
return tensor
except Exception:
+ # call the deleter to free the memory since we will continue
to use the chandle
+ dlpack.deleter(dlpack)
pass
# default return the tensor
tensor = _CLASS_TENSOR.__new__(_CLASS_TENSOR)
diff --git a/tests/python/test_load_inline.py b/tests/python/test_load_inline.py
index cd61c57..b30471b 100644
--- a/tests/python/test_load_inline.py
+++ b/tests/python/test_load_inline.py
@@ -318,3 +318,34 @@ def test_load_inline_both() -> None:
y_cuda = torch.empty_like(x_cuda)
mod.add_one_cuda(x_cuda, y_cuda)
torch.testing.assert_close(x_cuda + 1, y_cuda)
+
+
[email protected](
+ torch is None or not torch.cuda.is_available(), reason="Requires torch and
CUDA"
+)
+def test_cuda_memory_alloc_noleak() -> None:
+ assert torch is not None
+ mod = tvm_ffi.cpp.load_inline(
+ name="hello",
+ cuda_sources=r"""
+ #include <tvm/ffi/function.h>
+ #include <tvm/ffi/container/tensor.h>
+
+ namespace ffi = tvm::ffi;
+
+ ffi::Tensor return_tensor(tvm::ffi::TensorView x) {
+ ffi::Tensor y = ffi::Tensor::FromDLPackAlloc(
+ TVMFFIEnvGetTensorAllocator(), x.shape(), x.dtype(),
x->device);
+ return y;
+ }
+ """,
+ functions=["return_tensor"],
+ )
+ x = torch.arange(1024 * 1024, dtype=torch.float32, device="cuda")
+ current_allocated = torch.cuda.memory_allocated()
+ repeat = 8
+ for i in range(repeat):
+ mod.return_tensor(x)
+ diff = torch.cuda.memory_allocated() - current_allocated
+ # memory should not grow as we loop over
+ assert diff <= 1024**2 * 8