This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git


    from aee96e64b5 [DTYPE] Fix dtype functions after dtype refactor (#18041)
     add fd9c091097 [CUTLASS] Add GeMM kernels for Blackwell GPUs (#18033)
     add a00f8c50ef [Backend] JIT compile FlashInfer kernel with FFI header 
(#18047)
     add 74eae2f59d [3rdparty] Phasing out FlashInfer AOT from 3rdparty (#18046)
     add 61e7c8fb63 [Refactor] Rename `relax_vm` to `vm` (#18049)

No new revisions were added by this update.

Summary of changes:
 .gitmodules                                        |   3 -
 3rdparty/cutlass                                   |   2 +-
 3rdparty/flashinfer                                |   1 -
 CMakeLists.txt                                     |  25 +--
 cmake/modules/CUDA.cmake                           |   4 +-
 cmake/modules/Hexagon.cmake                        |   4 +-
 cmake/modules/LibInfo.cmake                        |   1 -
 cmake/modules/contrib/CUTLASS.cmake                |  16 +-
 docs/install/from_source.rst                       |   3 +-
 docs/reference/api/python/index.rst                |   2 +-
 .../api/python/runtime/{relax_vm.rst => vm.rst}    |   4 +-
 include/tvm/relax/exec_builder.h                   |   6 +-
 include/tvm/runtime/{relax_vm => vm}/builtin.h     |  12 +-
 include/tvm/runtime/{relax_vm => vm}/bytecode.h    |  12 +-
 include/tvm/runtime/{relax_vm => vm}/executable.h  |  16 +-
 .../{relax_vm => vm}/ndarray_cache_support.h       |  10 +-
 include/tvm/runtime/{relax_vm => vm}/vm.h          |  16 +-
 python/tvm/contrib/hexagon/session.py              |   4 +-
 python/tvm/relax/__init__.py                       |   4 +-
 python/tvm/relax/backend/cuda/flashinfer.py        |   5 +-
 python/tvm/relax/frontend/nn/core.py               |   2 +-
 python/tvm/relax/frontend/nn/torch.py              |   2 +-
 python/tvm/runtime/{relax_vm.py => vm.py}          |   0
 src/relax/backend/vm/codegen_vm.cc                 |   8 +-
 src/relax/backend/vm/codegen_vm_tir.cc             |   6 +-
 src/relax/backend/vm/vm_shape_lower.cc             |  18 +-
 .../{fp16_group_gemm.cu => fp16_group_gemm.cuh}    |  51 ++---
 ...runner.cuh => fp16_group_gemm_runner_sm100.cuh} |  79 +++++---
 ..._runner.cuh => fp16_group_gemm_runner_sm90.cuh} |  10 +-
 .../contrib/cutlass/fp16_group_gemm_sm100.cu       |  54 +++++
 ...{fp16_group_gemm.cu => fp16_group_gemm_sm90.cu} |  53 +++--
 .../contrib/cutlass/fp8_blockwise_scaled_gemm.cu   | 164 ---------------
 .../{fp8_group_gemm.cu => fp8_group_gemm_sm90.cu}  |   2 +-
 .../contrib/cutlass/fp8_groupwise_scaled_gemm.cuh  | 172 ++++++++++++++++
 .../fp8_groupwise_scaled_gemm_runner_sm100.cuh     | 155 +++++++++++++++
 ...h => fp8_groupwise_scaled_gemm_runner_sm90.cuh} |  53 +----
 .../cutlass/fp8_groupwise_scaled_gemm_sm100.cu     |  77 ++++++++
 .../cutlass/fp8_groupwise_scaled_gemm_sm90.cu      |  77 ++++++++
 ...p8_groupwise_scaled_group_gemm_runner_sm100.cuh | 220 +++++++++++++++++++++
 .../fp8_groupwise_scaled_group_gemm_sm100.cu       |  93 +++++++++
 src/runtime/disco/builtin.cc                       |   2 +-
 src/runtime/disco/loader.cc                        |   4 +-
 src/runtime/{relax_vm => vm}/attn_backend.cc       |   6 +-
 src/runtime/{relax_vm => vm}/attn_backend.h        |  12 +-
 src/runtime/{relax_vm => vm}/attn_utils.h          |  12 +-
 src/runtime/{relax_vm => vm}/builtin.cc            |  12 +-
 src/runtime/{relax_vm => vm}/bytecode.cc           |   8 +-
 .../{relax_vm => vm}/cuda/cuda_graph_builtin.cc    |  10 +-
 src/runtime/{relax_vm => vm}/executable.cc         |  14 +-
 src/runtime/{relax_vm => vm}/hexagon/builtin.cc    |   8 +-
 src/runtime/{relax_vm => vm}/kv_state.cc           |   4 +-
 src/runtime/{relax_vm => vm}/kv_state.h            |  10 +-
 src/runtime/{relax_vm => vm}/lm_support.cc         |   8 +-
 .../{relax_vm => vm}/ndarray_cache_support.cc      |   8 +-
 src/runtime/{relax_vm => vm}/paged_kv_cache.cc     |   8 +-
 src/runtime/{relax_vm => vm}/rnn_state.cc          |   6 +-
 src/runtime/{relax_vm => vm}/vm.cc                 |  12 +-
 src/support/libinfo.cc                             |   1 -
 src/target/tag.cc                                  |   2 +
 tests/python/contrib/test_cutlass_gemm.py          |  32 ++-
 .../python/contrib/test_tir_triton_integration.py  |  15 +-
 tests/python/disco/test_ccl.py                     |   2 +-
 .../python/relax/test_frontend_nn_extern_module.py |   4 +-
 tests/python/relax/test_tir_call_source_kernel.py  |   6 +-
 .../relax/test_training_optimizer_numeric.py       |   8 +-
 web/emcc/wasm_runtime.cc                           |  20 +-
 web/tests/node/{test_relax_vm.js => test_vm.js}    |   0
 67 files changed, 1172 insertions(+), 508 deletions(-)
 delete mode 160000 3rdparty/flashinfer
 rename docs/reference/api/python/runtime/{relax_vm.rst => vm.rst} (93%)
 rename include/tvm/runtime/{relax_vm => vm}/builtin.h (92%)
 rename include/tvm/runtime/{relax_vm => vm}/bytecode.h (97%)
 rename include/tvm/runtime/{relax_vm => vm}/executable.h (95%)
 rename include/tvm/runtime/{relax_vm => vm}/ndarray_cache_support.h (93%)
 rename include/tvm/runtime/{relax_vm => vm}/vm.h (96%)
 rename python/tvm/runtime/{relax_vm.py => vm.py} (100%)
 copy src/runtime/contrib/cutlass/{fp16_group_gemm.cu => fp16_group_gemm.cuh} 
(56%)
 copy src/runtime/contrib/cutlass/{group_gemm_runner.cuh => 
fp16_group_gemm_runner_sm100.cuh} (73%)
 rename src/runtime/contrib/cutlass/{group_gemm_runner.cuh => 
fp16_group_gemm_runner_sm90.cuh} (96%)
 create mode 100644 src/runtime/contrib/cutlass/fp16_group_gemm_sm100.cu
 rename src/runtime/contrib/cutlass/{fp16_group_gemm.cu => 
fp16_group_gemm_sm90.cu} (60%)
 delete mode 100644 src/runtime/contrib/cutlass/fp8_blockwise_scaled_gemm.cu
 rename src/runtime/contrib/cutlass/{fp8_group_gemm.cu => 
fp8_group_gemm_sm90.cu} (98%)
 create mode 100644 src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm.cuh
 create mode 100644 
src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_runner_sm100.cuh
 rename src/runtime/contrib/cutlass/{blockwise_scaled_gemm_runner.cuh => 
fp8_groupwise_scaled_gemm_runner_sm90.cuh} (75%)
 create mode 100644 
src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_sm100.cu
 create mode 100644 
src/runtime/contrib/cutlass/fp8_groupwise_scaled_gemm_sm90.cu
 create mode 100644 
src/runtime/contrib/cutlass/fp8_groupwise_scaled_group_gemm_runner_sm100.cuh
 create mode 100644 
src/runtime/contrib/cutlass/fp8_groupwise_scaled_group_gemm_sm100.cu
 rename src/runtime/{relax_vm => vm}/attn_backend.cc (97%)
 rename src/runtime/{relax_vm => vm}/attn_backend.h (99%)
 rename src/runtime/{relax_vm => vm}/attn_utils.h (99%)
 rename src/runtime/{relax_vm => vm}/builtin.cc (99%)
 rename src/runtime/{relax_vm => vm}/bytecode.cc (93%)
 rename src/runtime/{relax_vm => vm}/cuda/cuda_graph_builtin.cc (97%)
 rename src/runtime/{relax_vm => vm}/executable.cc (98%)
 rename src/runtime/{relax_vm => vm}/hexagon/builtin.cc (95%)
 rename src/runtime/{relax_vm => vm}/kv_state.cc (99%)
 rename src/runtime/{relax_vm => vm}/kv_state.h (98%)
 rename src/runtime/{relax_vm => vm}/lm_support.cc (99%)
 rename src/runtime/{relax_vm => vm}/ndarray_cache_support.cc (98%)
 rename src/runtime/{relax_vm => vm}/paged_kv_cache.cc (99%)
 rename src/runtime/{relax_vm => vm}/rnn_state.cc (99%)
 rename src/runtime/{relax_vm => vm}/vm.cc (99%)
 rename web/tests/node/{test_relax_vm.js => test_vm.js} (100%)

Reply via email to