This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from aee96e64b5 [DTYPE] Fix dtype functions after dtype refactor (#18041)
add fd9c091097 [CUTLASS] Add GeMM kernels for Blackwell GPUs (#18033)
add a00f8c50ef [Backend] JIT compile FlashInfer kernel with FFI header
(#18047)
add 74eae2f59d [3rdparty] Phasing out FlashInfer AOT from 3rdparty (#18046)
add 61e7c8fb63 [Refactor] Rename `relax_vm` to `vm` (#18049)
No new revisions were added by this update.
Summary of changes:
.gitmodules | 3 -
3rdparty/cutlass | 2 +-
3rdparty/flashinfer | 1 -
CMakeLists.txt | 25 +--
cmake/modules/CUDA.cmake | 4 +-
cmake/modules/Hexagon.cmake | 4 +-
cmake/modules/LibInfo.cmake | 1 -
cmake/modules/contrib/CUTLASS.cmake | 16 +-
docs/install/from_source.rst | 3 +-
docs/reference/api/python/index.rst | 2 +-
.../api/python/runtime/{relax_vm.rst => vm.rst} | 4 +-
include/tvm/relax/exec_builder.h | 6 +-
include/tvm/runtime/{relax_vm => vm}/builtin.h | 12 +-
include/tvm/runtime/{relax_vm => vm}/bytecode.h | 12 +-
include/tvm/runtime/{relax_vm => vm}/executable.h | 16 +-
.../{relax_vm => vm}/ndarray_cache_support.h | 10 +-
include/tvm/runtime/{relax_vm => vm}/vm.h | 16 +-
python/tvm/contrib/hexagon/session.py | 4 +-
python/tvm/relax/__init__.py | 4 +-
python/tvm/relax/backend/cuda/flashinfer.py | 5 +-
python/tvm/relax/frontend/nn/core.py | 2 +-
python/tvm/relax/frontend/nn/torch.py | 2 +-
python/tvm/runtime/{relax_vm.py => vm.py} | 0
src/relax/backend/vm/codegen_vm.cc | 8 +-
src/relax/backend/vm/codegen_vm_tir.cc | 6 +-
src/relax/backend/vm/vm_shape_lower.cc | 18 +-
.../{fp16_group_gemm.cu => fp16_group_gemm.cuh} | 51 ++---
...runner.cuh => fp16_group_gemm_runner_sm100.cuh} | 79 +++++---
..._runner.cuh => fp16_group_gemm_runner_sm90.cuh} | 10 +-
.../contrib/cutlass/fp16_group_gemm_sm100.cu | 54 +++++
...{fp16_group_gemm.cu => fp16_group_gemm_sm90.cu} | 53 +++--
.../contrib/cutlass/fp8_blockwise_scaled_gemm.cu | 164 ---------------
.../{fp8_group_gemm.cu => fp8_group_gemm_sm90.cu} | 2 +-
.../contrib/cutlass/fp8_groupwise_scaled_gemm.cuh | 172 ++++++++++++++++
.../fp8_groupwise_scaled_gemm_runner_sm100.cuh | 155 +++++++++++++++
...h => fp8_groupwise_scaled_gemm_runner_sm90.cuh} | 53 +----
.../cutlass/fp8_groupwise_scaled_gemm_sm100.cu | 77 ++++++++
.../cutlass/fp8_groupwise_scaled_gemm_sm90.cu | 77 ++++++++
...p8_groupwise_scaled_group_gemm_runner_sm100.cuh | 220 +++++++++++++++++++++
.../fp8_groupwise_scaled_group_gemm_sm100.cu | 93 +++++++++
src/runtime/disco/builtin.cc | 2 +-
src/runtime/disco/loader.cc | 4 +-
src/runtime/{relax_vm => vm}/attn_backend.cc | 6 +-
src/runtime/{relax_vm => vm}/attn_backend.h | 12 +-
src/runtime/{relax_vm => vm}/attn_utils.h | 12 +-
src/runtime/{relax_vm => vm}/builtin.cc | 12 +-
src/runtime/{relax_vm => vm}/bytecode.cc | 8 +-
.../{relax_vm => vm}/cuda/cuda_graph_builtin.cc | 10 +-
src/runtime/{relax_vm => vm}/executable.cc | 14 +-
src/runtime/{relax_vm => vm}/hexagon/builtin.cc | 8 +-
src/runtime/{relax_vm => vm}/kv_state.cc | 4 +-
src/runtime/{relax_vm => vm}/kv_state.h | 10 +-
src/runtime/{relax_vm => vm}/lm_support.cc | 8 +-
.../{relax_vm => vm}/ndarray_cache_support.cc | 8 +-
src/runtime/{relax_vm => vm}/paged_kv_cache.cc | 8 +-
src/runtime/{relax_vm => vm}/rnn_state.cc | 6 +-
src/runtime/{relax_vm => vm}/vm.cc | 12 +-
src/support/libinfo.cc | 1 -
src/target/tag.cc | 2 +
tests/python/contrib/test_cutlass_gemm.py | 32 ++-
.../python/contrib/test_tir_triton_integration.py | 15 +-
tests/python/disco/test_ccl.py | 2 +-
.../python/relax/test_frontend_nn_extern_module.py | 4 +-
tests/python/relax/test_tir_call_source_kernel.py | 6 +-
.../relax/test_training_optimizer_numeric.py | 8 +-
web/emcc/wasm_runtime.cc | 20 +-
web/tests/node/{test_relax_vm.js => test_vm.js} | 0
67 files changed, 1172 insertions(+), 508 deletions(-)
delete mode 160000 3rdparty/flashinfer
rename docs/reference/api/python/runtime/{relax_vm.rst => vm.rst} (93%)
rename include/tvm/runtime/{relax_vm => vm}/builtin.h (92%)
rename include/tvm/runtime/{relax_vm => vm}/bytecode.h (97%)
rename include/tvm/runtime/{relax_vm => vm}/executable.h (95%)
rename include/tvm/runtime/{relax_vm => vm}/ndarray_cache_support.h (93%)
rename include/tvm/runtime/{relax_vm => vm}/vm.h (96%)
rename python/tvm/runtime/{relax_vm.py => vm.py} (100%)
copy src/runtime/contrib/cutlass/{fp16_group_gemm.cu => fp16_group_gemm.cuh}
(56%)
copy src/runtime/contrib/cutlass/{group_gemm_runner.cuh =>
fp16_group_gemm_runner_sm100.cuh} (73%)
rename src/runtime/contrib/cutlass/{group_gemm_runner.cuh =>
fp16_group_gemm_runner_sm90.cuh} (96%)
create mode 100644 src/runtime/contrib/cutlass/fp16_group_gemm_sm100.cu
rename src/runtime/contrib/cutlass/{fp16_group_gemm.cu =>
fp16_group_gemm_sm90.cu} (60%)
delete mode 100644 src/runtime/contrib/cutlass/fp8_blockwise_scaled_gemm.cu
rename src/runtime/contrib/cutlass/{fp8_group_gemm.cu =>
fp8_group_gemm_sm90.cu} (98%)
create mode 100644 src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm.cuh
create mode 100644
src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_runner_sm100.cuh
rename src/runtime/contrib/cutlass/{blockwise_scaled_gemm_runner.cuh =>
fp8_groupwise_scaled_gemm_runner_sm90.cuh} (75%)
create mode 100644
src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_sm100.cu
create mode 100644
src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_sm90.cu
create mode 100644
src/runtime/contrib/cutlass/fp8_groupwise_scaled_group_gemm_runner_sm100.cuh
create mode 100644
src/runtime/contrib/cutlass/fp8_groupwise_scaled_group_gemm_sm100.cu
rename src/runtime/{relax_vm => vm}/attn_backend.cc (97%)
rename src/runtime/{relax_vm => vm}/attn_backend.h (99%)
rename src/runtime/{relax_vm => vm}/attn_utils.h (99%)
rename src/runtime/{relax_vm => vm}/builtin.cc (99%)
rename src/runtime/{relax_vm => vm}/bytecode.cc (93%)
rename src/runtime/{relax_vm => vm}/cuda/cuda_graph_builtin.cc (97%)
rename src/runtime/{relax_vm => vm}/executable.cc (98%)
rename src/runtime/{relax_vm => vm}/hexagon/builtin.cc (95%)
rename src/runtime/{relax_vm => vm}/kv_state.cc (99%)
rename src/runtime/{relax_vm => vm}/kv_state.h (98%)
rename src/runtime/{relax_vm => vm}/lm_support.cc (99%)
rename src/runtime/{relax_vm => vm}/ndarray_cache_support.cc (98%)
rename src/runtime/{relax_vm => vm}/paged_kv_cache.cc (99%)
rename src/runtime/{relax_vm => vm}/rnn_state.cc (99%)
rename src/runtime/{relax_vm => vm}/vm.cc (99%)
rename web/tests/node/{test_relax_vm.js => test_vm.js} (100%)