Rhys-Q opened a new issue, #422:
URL: https://github.com/apache/tvm-ffi/issues/422

   When attempting to use CuteDSL with the `--enable-tvm-ffi` option to export 
a vector add kernel and integrate it with TVM Relax, I encounter a 
`RuntimeError: Module[library] does not support SaveToBytes` error during the 
VirtualMachine initialization phase.
   
   Here is my code :
   ``` python
   """Test CuteDSL vector add dispatch and correctness."""
   
   import os
   
   import subprocess
   
   import shutil
   import numpy as np
   import tvm
   from tvm import relax
   from tvm.script import ir as I
   from tvm.script import relax as R
   from cutlass.cute.runtime import make_fake_compact_tensor
   import cutlass.cute as cute  # type: ignore
   
   import logging
   
   
   import tvm
   from tvm.script import ir as I
   
   
   logger = logging.getLogger(__name__)
   
   DEFAULT_BLOCK_SIZE = 256
   
   
   @cute.kernel  # type: ignore
   def _vector_add(A: cute.Tensor, B: cute.Tensor, C: cute.Tensor):  # type: 
ignore
       tidx, _, _ = cute.arch.thread_idx()
       bidx, _, _ = cute.arch.block_idx()
       bdim, _, _ = cute.arch.block_dim()
       i = bidx * bdim + tidx
       if i < A.shape[0]:
           C[i] = A[i] + B[i]
   
   
   @cute.jit  # type: ignore
   def _vector_add_kernel(  # type: ignore
       A: cute.Tensor,
       B: cute.Tensor,
       block_size: cute.Uint32,
       C: cute.Tensor,
   ):
       N = A.shape[0]
       grid_dim = [cute.ceil_div(N, block_size), 1, 1]
       _vector_add(A, B, C).launch(grid=grid_dim, block=[block_size, 1, 1], 
smem=0)
   
   
   @I.ir_module
   class Module:
       @R.function
       def main(
           x: R.Tensor((1024,), "float32"),
           y: R.Tensor((1024,), "float32"),
       ) -> R.Tensor((1024,), "float32"):
           with R.dataflow():
               z = R.call_dps_packed("vector_add", [x, y, 256], x.struct_info)
               R.output(z)
           return z
   
   
   def test_cutedsl_vector_add():
       build_dir = "./cutedsl_build"
       lib_path = os.path.join(build_dir, "libtvm_cutedsl.so")
       if os.path.exists(build_dir):
           shutil.rmtree(build_dir)
       os.makedirs(build_dir, exist_ok=True)
       object_file_path = os.path.join(build_dir, f"vector_add.o")
       cute_args = []
       cute_args.append(make_fake_compact_tensor(dtype=cute.Float32, 
shape=[1024]))
       cute_args.append(make_fake_compact_tensor(dtype=cute.Float32, 
shape=[1024]))
       cute_args.append(cute.Int32(256))
       cute_args.append(make_fake_compact_tensor(dtype=cute.Float32, 
shape=[1024]))
       compiled = cute.compile(_vector_add_kernel, *cute_args, 
options="--enable-tvm-ffi")  # type: ignore
       compiled.export_to_c(object_file_path, function_name="vector_add")
   
       shared_libs = cute.runtime.find_runtime_libraries(enable_tvm_ffi=True)
       # compile the object file to a shared library
       cmd = ["gcc", "-shared", "-o", lib_path, object_file_path, *shared_libs]
       subprocess.run(cmd, check=True)
       extern_mod = tvm.runtime.load_module(lib_path)
       mod = Module
   
       mod_attrs = dict(mod.attrs) if mod.attrs else {}
       mod = mod.with_attr(
           "external_mods", list(mod_attrs.get("external_mods", [])) + 
[extern_mod]
       )
   
       ex = relax.build(mod, target="cuda")
       dev = tvm.cuda(0)
       vm = relax.VirtualMachine(ex, dev)
   
       np.random.seed(0)
       x_np = np.random.rand(N).astype("float32")
       y_np = np.random.rand(N).astype("float32")
       x_tvm = tvm.runtime.tensor(x_np, device=dev)
       y_tvm = tvm.runtime.tensor(y_np, device=dev)
   
       out = vm["main"](x_tvm, y_tvm)
       out_np = out.numpy()
       ref = (torch.from_numpy(x_np).cuda() + 
torch.from_numpy(y_np).cuda()).cpu().numpy()
       np.testing.assert_array_equal(out_np, ref)
       print("SUCCESS: CuteDSL vector add dispatched and matched PyTorch")
   
   
   if __name__ == "__main__":
       test_cutedsl_vector_add()
   
   ```
   
   error info: 
   ``` bash
    p tests/python/test_cutedsl_vector_add_reproduce.py 
   
/root/miniconda3/lib/python3.12/site-packages/nvidia_cutlass_dsl/python_packages/cutlass/base_dsl/dsl.py:420:
 UserWarning: Dynamic variable in block size ?, 1, 1, cannot auto-generate 
`nvvm.reqntid`
     warnings.warn(message, UserWarning)
   /usr/bin/ld: ./cutedsl_build/vector_add.o: warning: relocation against 
`TVMFFIErrorSetRaisedFromCStr' in read-only section `.text'
   /usr/bin/ld: warning: creating DT_TEXTREL in a shared object
   Traceback (most recent call last):
     File 
"/root/autodl-tmp/LiteSynth/tests/python/test_cutedsl_vector_add_reproduce.py", 
line 108, in <module>
       test_cutedsl_vector_add()
     File 
"/root/autodl-tmp/LiteSynth/tests/python/test_cutedsl_vector_add_reproduce.py", 
line 92, in test_cutedsl_vector_add
       vm = relax.VirtualMachine(ex, dev)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     File "/root/autodl-tmp/tvm-dg/python/tvm/runtime/vm.py", line 76, in 
__init__
       rt_mod = rt_mod.jit()
                ^^^^^^^^^^^^
     File "/root/autodl-tmp/tvm-dg/python/tvm/runtime/executable.py", line 109, 
in jit
       self.export_library(dso_path, fcompile=fcompile, addons=addons, **kwargs)
     File "/root/autodl-tmp/tvm-dg/python/tvm/runtime/executable.py", line 165, 
in export_library
       return self.mod.export_library(
              ^^^^^^^^^^^^^^^^^^^^^^^^
     File "/root/autodl-tmp/tvm-dg/python/tvm/runtime/module.py", line 282, in 
export_library
       m = _ffi_api.ModulePackImportsToLLVM(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     File "python/tvm_ffi/cython/function.pxi", line 904, in 
tvm_ffi.core.Function.__call__
     File "<unknown>", line 0, in 
tvm::codegen::PackImportsToLLVM(tvm::ffi::Module const&, bool, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&, std::__cxx11::basic_string<char, std::char_traits<char>, 
std::allocator<char> > const&)
     File "<unknown>", line 0, in 
tvm::codegen::PackImportsToBytes[abi:cxx11](tvm::ffi::Module const&)
     File "<unknown>", line 0, in 
tvm::codegen::SerializeModuleToBytes[abi:cxx11](tvm::ffi::Module const&, bool)
     File "<unknown>", line 0, in 
tvm::codegen::ModuleSerializer::SerializeModuleToBytes(dmlc::Stream*, bool)
     File "include/tvm/ffi/extra/module.h", line 110, in virtual 
tvm::ffi::Bytes tvm::ffi::ModuleObj::SaveToBytes() const
   RuntimeError: Module[library] does not support SaveToBytes
   ```
   
   env:
   ``` bash
   pip freeze
   absl-py==2.3.1
   accelerate==1.12.0
   alabaster==1.0.0
   anaconda-anon-usage @ file:///croot/anaconda-anon-usage_1710965072196/work
   annotated-types==0.7.0
   anyio==4.10.0
   apache-tvm-ffi @ file:///root/autodl-tmp/tvm-dg/3rdparty/tvm-ffi
   archspec @ file:///croot/archspec_1709217642129/work
   argon2-cffi==25.1.0
   argon2-cffi-bindings==25.1.0
   arrow==1.3.0
   asttokens==3.0.0
   async-lru==2.0.5
   attrs==25.3.0
   Authlib==1.6.6
   babel==2.17.0
   bandit==1.9.2
   beautifulsoup4==4.13.4
   black==25.12.0
   bleach==6.2.0
   boltons @ 
file:///work/perseverance-python-buildout/croot/boltons_1698851177130/work
   Brotli @ file:///croot/brotli-split_1714483155106/work
   cachetools==6.2.4
   certifi @ 
file:///home/task_176765899978398/croot/certifi_1767659142032/work/certifi
   cffi @ file:///croot/cffi_1714483155441/work
   cfgv==3.5.0
   chardet==5.2.0
   charset-normalizer @ 
file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
   click==8.3.1
   colorama==0.4.6
   comm==0.2.3
   conda @ file:///croot/conda_1736277811838/work
   conda-content-trust @ file:///croot/conda-content-trust_1714483159009/work
   conda-libmamba-solver @ 
file:///croot/conda-libmamba-solver_1706733287605/work/src
   conda-package-handling @ 
file:///croot/conda-package-handling_1714483155348/work
   conda_package_streaming @ 
file:///work/perseverance-python-buildout/croot/conda-package-streaming_1698847176583/work
   contourpy==1.3.3
   coverage==7.13.1
   cryptography @ file:///croot/cryptography_1714660666131/work
   cuda-bindings==13.1.1
   cuda-pathfinder==1.3.3
   cuda-python==13.1.1
   cycler==0.12.1
   debugpy==1.8.16
   decorator==5.2.1
   defusedxml==0.7.1
   distlib==0.4.0
   distro @ file:///croot/distro_1714488253808/work
   docutils==0.21.2
   dparse==0.6.4
   execnet==2.1.2
   executing==2.2.0
   fastjsonschema==2.21.1
   filelock==3.20.3
   flake8==7.3.0
   fonttools==4.59.0
   fqdn==1.5.1
   frozendict @ 
file:///home/task_176175050888873/conda-bld/frozendict_1761750665347/work
   fsspec==2025.7.0
   grpcio==1.74.0
   h11==0.16.0
   hf-xet==1.2.0
   httpcore==1.0.9
   httpx==0.28.1
   huggingface-hub==0.36.0
   identify==2.6.15
   idna @ file:///croot/idna_1714398848350/work
   imagesize==1.4.1
   iniconfig==2.3.0
   ipykernel==6.30.1
   ipython==9.4.0
   ipython_pygments_lexers==1.1.1
   ipywidgets==8.1.7
   isoduration==20.11.0
   isort==7.0.0
   jedi==0.19.2
   Jinja2==3.1.6
   joblib==1.5.3
   json5==0.12.0
   jsonpatch @ file:///croot/jsonpatch_1714483231291/work
   jsonpointer==2.1
   jsonschema==4.25.0
   jsonschema-specifications==2025.4.1
   jupyter-events==0.12.0
   jupyter-lsp==2.2.6
   jupyter_client==8.6.3
   jupyter_core==5.8.1
   jupyter_server==2.16.0
   jupyter_server_terminals==0.5.3
   jupyterlab==4.4.5
   jupyterlab-language-pack-zh-CN==4.4.post0
   jupyterlab_pygments==0.3.0
   jupyterlab_server==2.27.3
   jupyterlab_widgets==3.0.15
   kiwisolver==1.4.8
   lark==1.2.2
   libmambapy @ file:///croot/mamba-split_1714483352891/work/libmambapy
   librt==0.7.7
   Markdown==3.8.2
   markdown-it-py==3.0.0
   MarkupSafe==3.0.2
   marshmallow==4.2.0
   matplotlib==3.10.5
   matplotlib-inline==0.1.7
   mccabe==0.7.0
   mdit-py-plugins==0.5.0
   mdurl==0.1.2
   menuinst @ file:///croot/menuinst_1714510563922/work
   mistune==3.1.3
   ml_dtypes==0.5.4
   mpmath==1.3.0
   mypy==1.19.1
   mypy_extensions==1.1.0
   myst-parser==4.0.1
   nbclient==0.10.2
   nbconvert==7.16.6
   nbformat==5.10.4
   nest-asyncio==1.6.0
   networkx==3.5
   ninja==1.13.0
   nltk==3.9.2
   nodeenv==1.10.0
   notebook_shim==0.2.4
   numpy==2.3.2
   nvidia-cublas-cu12==12.4.5.8
   nvidia-cuda-cupti-cu12==12.4.127
   nvidia-cuda-nvrtc-cu12==12.4.127
   nvidia-cuda-runtime-cu12==12.4.127
   nvidia-cudnn-cu12==9.1.0.70
   nvidia-cufft-cu12==11.2.1.3
   nvidia-cufile-cu12==1.13.1.3
   nvidia-curand-cu12==10.3.5.147
   nvidia-cusolver-cu12==11.6.1.9
   nvidia-cusparse-cu12==12.3.1.170
   nvidia-cusparselt-cu12==0.7.1
   nvidia-cutlass-dsl==4.3.4
   nvidia-nccl-cu12==2.21.5
   nvidia-nvjitlink-cu12==12.4.127
   nvidia-nvtx-cu12==12.4.127
   overrides==7.7.0
   packaging==25.0
   pandocfilters==1.5.1
   parso==0.8.4
   pathspec==1.0.3
   pexpect==4.9.0
   pillow==11.3.0
   platformdirs==4.5.1
   pluggy==1.6.0
   pre_commit==4.5.1
   prometheus_client==0.22.1
   prompt_toolkit==3.0.51
   protobuf==6.31.1
   psutil==7.0.0
   ptyprocess==0.7.0
   pure_eval==0.2.3
   py-cpuinfo==9.0.0
   pycodestyle==2.14.0
   pycosat @ file:///croot/pycosat_1714510623388/work
   pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
   pydantic==2.12.5
   pydantic_core==2.41.5
   pyflakes==3.4.0
   Pygments==2.19.2
   pyparsing==3.2.3
   pyproject-api==1.10.0
   PySocks @ 
file:///work/perseverance-python-buildout/croot/pysocks_1698845478203/work
   pytest==9.0.2
   pytest-asyncio==1.3.0
   pytest-benchmark==5.2.3
   pytest-cov==7.0.0
   pytest-mock==3.15.1
   pytest-timeout==2.4.0
   pytest-xdist==3.8.0
   python-dateutil==2.9.0.post0
   python-json-logger==3.3.0
   pytokens==0.3.0
   PyYAML==6.0.2
   pyzmq==27.0.1
   referencing==0.36.2
   regex==2025.11.3
   requests @ file:///croot/requests_1707355572290/work
   rfc3339-validator==0.1.4
   rfc3986-validator==0.1.1
   rfc3987-syntax==1.1.0
   rich==14.2.0
   roman-numerals==4.1.0
   roman-numerals-py==4.1.0
   rpds-py==0.26.0
   ruamel.yaml @ 
file:///work/perseverance-python-buildout/croot/ruamel.yaml_1698863605521/work
   safetensors==0.7.0
   safety==3.7.0
   safety-schemas==0.0.16
   Send2Trash==1.8.3
   setuptools==69.5.1
   shellingham==1.5.4
   six==1.17.0
   sniffio==1.3.1
   snowballstemmer==3.0.1
   soupsieve==2.7
   Sphinx==8.2.3
   sphinx-rtd-theme==3.0.2
   sphinxcontrib-applehelp==2.0.0
   sphinxcontrib-devhelp==2.0.0
   sphinxcontrib-htmlhelp==2.1.0
   sphinxcontrib-jquery==4.1
   sphinxcontrib-jsmath==1.0.1
   sphinxcontrib-qthelp==2.0.0
   sphinxcontrib-serializinghtml==2.0.0
   stack-data==0.6.3
   stevedore==5.6.0
   supervisor==4.2.5
   sympy==1.13.1
   tenacity==9.1.2
   tensorboard==2.20.0
   tensorboard-data-server==0.7.2
   terminado==0.18.1
   tinycss2==1.4.0
   tokenizers==0.20.3
   tomlkit==0.13.3
   torch==2.5.1
   torchvision==0.20.1
   tornado==6.5.1
   tox==4.34.1
   tqdm @ file:///croot/tqdm_1714567712644/work
   traitlets==5.14.3
   transformers==4.46.3
   triton==3.5.1
   truststore @ 
file:///work/perseverance-python-buildout/croot/truststore_1701735771625/work
   typer==0.21.1
   types-python-dateutil==2.9.0.20250708
   types-requests==2.32.4.20260107
   typing-inspection==0.4.2
   typing_extensions==4.14.1
   uri-template==1.3.0
   urllib3 @ file:///croot/urllib3_1707770551213/work
   uv==0.9.21
   virtualenv==20.36.1
   vulture==2.14
   wcwidth==0.2.13
   webcolors==24.11.1
   webencodings==0.5.1
   websocket-client==1.8.0
   Werkzeug==3.1.3
   wheel==0.43.0
   widgetsnbextension==4.0.14
   zstandard @ file:///croot/zstandard_1714677652653/work
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to