This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from ab25b49225 [TIR] Fix InjectPTXLDG32 segfaults and skip non-CUDA
targets (#18671)
add 877b448b02 [REFACTOR][TIR] Rename tir.Block to SBlock (#18689)
add 95f41d66e7 [Web][Version] Fix WebLLM vision model issues (#18680)
add c043e4fc8d [Relax] Add NN operator attributes include to TensorRT
codegen (#18690)
add 71ac3e821b [Relax][NN] Add batch_flatten operator (#18677)
No new revisions were added by this update.
Summary of changes:
apps/android_rpc/tests/android_rpc_test.py | 2 +-
apps/ios_rpc/tests/ios_rpc_test.py | 2 +-
docs/deep_dive/relax/learning.rst | 6 +-
docs/deep_dive/relax/tutorials/relax_creation.py | 6 +-
docs/deep_dive/tensor_ir/abstraction.rst | 4 +-
docs/deep_dive/tensor_ir/learning.rst | 12 +-
docs/deep_dive/tensor_ir/tutorials/tir_creation.py | 16 +-
.../tensor_ir/tutorials/tir_transformation.py | 8 +-
docs/how_to/tutorials/cross_compilation_and_rpc.py | 2 +-
docs/reference/api/python/tir/tir.rst | 2 +-
.../tvm/meta_schedule/schedule/cuda/thread_bind.h | 2 +-
.../tvm/meta_schedule/schedule/generic/winograd.h | 2 +-
include/tvm/meta_schedule/schedule_rule.h | 8 +-
include/tvm/relax/analysis.h | 2 +-
include/tvm/relax/distributed/axis_group_graph.h | 2 +-
include/tvm/script/ir_builder/base.h | 2 +-
include/tvm/script/ir_builder/relax/frame.h | 21 +-
include/tvm/script/ir_builder/relax/ir.h | 4 +-
include/tvm/script/ir_builder/tir/frame.h | 44 +-
include/tvm/script/ir_builder/tir/ir.h | 6 +-
include/tvm/tir/analysis.h | 12 +-
include/tvm/tir/block_dependence_info.h | 20 +-
include/tvm/tir/block_scope.h | 34 +-
include/tvm/tir/data_type_rewriter.h | 8 +-
include/tvm/tir/function.h | 4 +-
include/tvm/tir/schedule/instruction.h | 8 +-
include/tvm/tir/schedule/schedule.h | 156 +--
include/tvm/tir/schedule/state.h | 48 +-
include/tvm/tir/stmt.h | 64 +-
include/tvm/tir/stmt_functor.h | 16 +-
include/tvm/tir/transform.h | 28 +-
include/tvm/tir/utils.h | 14 +-
jvm/core/src/test/scripts/prepare_test_libs.py | 2 +-
python/tvm/dlight/__init__.py | 2 +-
python/tvm/dlight/adreno/convolution.py | 6 +-
python/tvm/dlight/adreno/fallback.py | 16 +-
python/tvm/dlight/adreno/layout_transform.py | 2 +-
python/tvm/dlight/adreno/pool.py | 10 +-
python/tvm/dlight/adreno/utils.py | 8 +-
python/tvm/dlight/analysis/__init__.py | 4 +-
python/tvm/dlight/analysis/common_analysis.py | 61 +-
python/tvm/dlight/analysis/gemv.py | 14 +-
python/tvm/dlight/base/common_schedules.py | 20 +-
python/tvm/dlight/cpu/gemv.py | 6 +-
python/tvm/dlight/gpu/fallback.py | 2 +-
python/tvm/dlight/gpu/gemv.py | 14 +-
python/tvm/dlight/gpu/low_batch_gemv.py | 32 +-
python/tvm/dlight/gpu/matmul.py | 42 +-
python/tvm/dlight/gpu/reduction.py | 16 +-
python/tvm/dlight/gpu/rmsnorm.py | 8 +-
python/tvm/dlight/gpu/transpose.py | 4 +-
python/tvm/exec/gpu_memory_bandwidth.py | 2 +-
.../schedule/cuda/layout_transform.py | 18 +-
.../schedule_rule/multi_level_tiling.py | 6 +-
.../meta_schedule/schedule_rule/schedule_rule.py | 10 +-
python/tvm/relax/analysis/analysis.py | 6 +-
python/tvm/relax/backend/gpu_generic/cumsum.py | 2 +-
python/tvm/relax/backend/gpu_generic/sampling.py | 8 +-
python/tvm/relax/backend/metal/coreml.py | 5 +-
python/tvm/relax/block_builder.py | 8 +-
python/tvm/relax/frontend/nn/llm/kv_cache.py | 174 ++--
.../relax/frontend/nn/llm/position_embedding.py | 12 +-
python/tvm/relax/frontend/nn/llm/tree_attn.py | 98 +-
python/tvm/relax/frontend/nn/op.py | 6 +-
python/tvm/relax/op/nn/__init__.py | 1 +
python/tvm/relax/op/nn/nn.py | 19 +
python/tvm/relax/transform/legalize_ops/nn.py | 7 +
python/tvm/relax/transform/transform.py | 4 +-
python/tvm/script/ir_builder/base.py | 2 +-
python/tvm/script/ir_builder/relax/frame.py | 4 +-
python/tvm/script/ir_builder/relax/ir.py | 4 +-
python/tvm/script/ir_builder/tir/frame.py | 6 +-
python/tvm/script/ir_builder/tir/ir.py | 22 +-
python/tvm/script/parser/relax/parser.py | 4 +-
python/tvm/te/operation.py | 4 +-
python/tvm/testing/tir.py | 8 +-
python/tvm/tir/__init__.py | 6 +-
python/tvm/tir/analysis/analysis.py | 24 +-
python/tvm/tir/block_dependence_info.py | 30 +-
python/tvm/tir/block_scope.py | 26 +-
python/tvm/tir/function.py | 4 +-
python/tvm/tir/functor.py | 58 +-
python/tvm/tir/schedule/__init__.py | 4 +-
python/tvm/tir/schedule/analysis.py | 14 +-
python/tvm/tir/schedule/instruction.py | 14 +-
python/tvm/tir/schedule/schedule.py | 626 +++++------
python/tvm/tir/schedule/state.py | 18 +-
python/tvm/tir/schedule/transform.py | 6 +-
python/tvm/tir/stmt.py | 22 +-
python/tvm/tir/tensor_intrin/arm_cpu.py | 36 +-
python/tvm/tir/tensor_intrin/cuda.py | 72 +-
python/tvm/tir/tensor_intrin/dot_product_common.py | 6 +-
python/tvm/tir/tensor_intrin/hexagon.py | 24 +-
python/tvm/tir/tensor_intrin/metal.py | 24 +-
python/tvm/tir/tensor_intrin/riscv_cpu.py | 8 +-
python/tvm/tir/tensor_intrin/rocm.py | 28 +-
python/tvm/tir/tensor_intrin/x86.py | 8 +-
python/tvm/tir/transform/transform.py | 4 +-
src/arith/ir_mutator_with_analyzer.cc | 2 +-
src/arith/ir_mutator_with_analyzer.h | 2 +-
src/arith/ir_visitor_with_analyzer.cc | 2 +-
src/arith/ir_visitor_with_analyzer.h | 2 +-
.../feature_extractor/per_store_feature.cc | 4 +-
src/meta_schedule/module_equality.cc | 6 +-
.../mutator/mutate_compute_location.cc | 2 +-
src/meta_schedule/mutator/mutate_parallel.cc | 36 +-
.../postproc/rewrite_cooperative_fetch.cc | 10 +-
src/meta_schedule/postproc/rewrite_layout.cc | 30 +-
.../postproc/rewrite_parallel_vectorize_unroll.cc | 23 +-
.../postproc/rewrite_reduction_block.cc | 14 +-
src/meta_schedule/postproc/rewrite_tensorize.cc | 18 +-
.../postproc/rewrite_unbound_block.cc | 8 +-
src/meta_schedule/postproc/verify_gpu_code.cc | 2 +-
src/meta_schedule/schedule/cpu/winograd.cc | 18 +-
src/meta_schedule/schedule/cuda/thread_bind.cc | 2 +-
src/meta_schedule/schedule/cuda/winograd.cc | 24 +-
src/meta_schedule/schedule/generic/winograd.cc | 8 +-
src/meta_schedule/schedule_rule/add_rfactor.cc | 6 +-
.../schedule_rule/apply_custom_rule.cc | 2 +-
src/meta_schedule/schedule_rule/auto_bind.cc | 4 +-
src/meta_schedule/schedule_rule/auto_inline.cc | 14 +-
.../schedule_rule/cross_thread_reduction.cc | 43 +-
.../schedule_rule/multi_level_tiling.cc | 26 +-
.../schedule_rule/multi_level_tiling.h | 14 +-
.../multi_level_tiling_tensor_core.cc | 60 +-
.../multi_level_tiling_wide_vector.cc | 10 +-
.../multi_level_tiling_with_intrin.cc | 8 +-
.../schedule_rule/parallel_vectorize_unroll.cc | 6 +-
.../schedule_rule/random_compute_location.cc | 10 +-
src/meta_schedule/schedule_rule/schedule_rule.cc | 2 +-
.../space_generator/post_order_apply.cc | 6 +-
src/meta_schedule/trace_apply.cc | 54 +-
src/meta_schedule/utils.h | 32 +-
src/relax/analysis/layout_transformation.cc | 25 +-
src/relax/analysis/tir_op_pattern_kind.cc | 14 +-
src/relax/backend/contrib/tensorrt/codegen.cc | 3 +-
src/relax/backend/task_extraction.cc | 6 +-
.../transform/lower_global_view_to_local_view.cc | 33 +-
src/relax/ir/block_builder.cc | 30 +-
src/relax/op/nn/nn.cc | 50 +
src/relax/op/nn/nn.h | 3 +
src/relax/transform/dataflow_inplace.cc | 4 +-
src/relax/transform/fuse_tir.cc | 26 +-
src/relax/transform/rewrite_cuda_graph.cc | 6 +-
src/relax/transform/split_call_tir_by_pattern.cc | 42 +-
.../transform/split_layout_rewrite_preproc.cc | 33 +-
src/script/ir_builder/relax/frame.cc | 26 +-
src/script/ir_builder/relax/ir.cc | 23 +-
src/script/ir_builder/relax/utils.h | 5 +-
src/script/ir_builder/tir/frame.cc | 16 +-
src/script/ir_builder/tir/ir.cc | 28 +-
src/script/ir_builder/tir/utils.h | 18 +-
src/script/printer/tir/block.cc | 20 +-
src/script/printer/tir/function.cc | 16 +-
src/te/operation/create_primfunc.cc | 104 +-
src/tir/analysis/block_access_region_detector.cc | 18 +-
src/tir/analysis/buffer_access_lca_detector.cc | 10 +-
src/tir/analysis/control_flow_graph.cc | 4 +-
src/tir/analysis/estimate_flops.cc | 4 +-
src/tir/analysis/stmt_finding.cc | 26 +-
src/tir/analysis/verify_well_formed.cc | 4 +-
src/tir/ir/block_dependence_info.cc | 21 +-
src/tir/ir/block_scope.cc | 24 +-
src/tir/ir/data_type_rewriter.cc | 20 +-
src/tir/ir/py_functor.cc | 36 +-
src/tir/ir/script/script_complete.cc | 18 +-
src/tir/ir/specialize.cc | 10 +-
src/tir/ir/stmt.cc | 34 +-
src/tir/ir/stmt_functor.cc | 12 +-
src/tir/ir/tir_visitor_with_path.cc | 4 +-
src/tir/ir/tir_visitor_with_path.h | 4 +-
src/tir/schedule/analysis.h | 50 +-
src/tir/schedule/analysis/analysis.cc | 258 ++---
src/tir/schedule/analysis/reducer.cc | 16 +-
src/tir/schedule/analysis/verify.cc | 18 +-
src/tir/schedule/concrete_schedule.cc | 200 ++--
src/tir/schedule/concrete_schedule.h | 133 +--
src/tir/schedule/error.h | 4 +-
src/tir/schedule/instruction.cc | 2 +-
src/tir/schedule/ir_comparator.cc | 18 +-
src/tir/schedule/ir_comparator.h | 10 +-
src/tir/schedule/primitive.h | 6 +-
src/tir/schedule/primitive/annotate.cc | 30 +-
.../schedule/primitive/annotate_buffer_access.cc | 16 +-
src/tir/schedule/primitive/block_annotate.cc | 65 +-
src/tir/schedule/primitive/blockize_tensorize.cc | 191 ++--
src/tir/schedule/primitive/cache_index.cc | 62 +-
src/tir/schedule/primitive/cache_read_write.cc | 276 ++---
src/tir/schedule/primitive/compute_at.cc | 61 +-
src/tir/schedule/primitive/compute_inline.cc | 220 ++--
src/tir/schedule/primitive/decompose_padding.cc | 91 +-
src/tir/schedule/primitive/for_kind.cc | 12 +-
src/tir/schedule/primitive/get_block_loop.cc | 45 +-
src/tir/schedule/primitive/hide_buffer_access.cc | 12 +-
.../schedule/primitive/layout_transformation.cc | 148 +--
src/tir/schedule/primitive/loop_transformation.cc | 92 +-
src/tir/schedule/primitive/pad_einsum.cc | 65 +-
src/tir/schedule/primitive/read_write_at.cc | 96 +-
src/tir/schedule/primitive/reduction.cc | 117 +--
.../schedule/primitive/reorder_block_iter_var.cc | 34 +-
src/tir/schedule/primitive/rolling_buffer.cc | 54 +-
src/tir/schedule/primitive/sampling.cc | 4 +-
src/tir/schedule/schedule.cc | 36 +-
src/tir/schedule/state.cc | 136 +--
src/tir/schedule/trace.cc | 20 +-
src/tir/schedule/traced_schedule.cc | 174 ++--
src/tir/schedule/traced_schedule.h | 105 +-
src/tir/schedule/transform.cc | 66 +-
src/tir/schedule/transform.h | 20 +-
src/tir/schedule/utils.h | 20 +-
src/tir/transforms/bind_params.cc | 6 +-
src/tir/transforms/compact_buffer_region.cc | 12 +-
src/tir/transforms/convert_blocks_to_opaque.cc | 12 +-
src/tir/transforms/default_gpu_schedule.cc | 6 +-
src/tir/transforms/flatten_buffer.cc | 4 +-
src/tir/transforms/force_narrow_index_to_i32.cc | 4 +-
src/tir/transforms/inject_permuted_layout.cc | 4 +-
src/tir/transforms/inject_software_pipeline.cc | 114 +-
src/tir/transforms/inline_private_functions.cc | 4 +-
src/tir/transforms/ir_utils.cc | 8 +-
src/tir/transforms/lower_cross_thread_reduction.cc | 138 +--
src/tir/transforms/lower_init_block.cc | 4 +-
src/tir/transforms/lower_match_buffer.cc | 4 +-
src/tir/transforms/lower_opaque_block.cc | 6 +-
.../manifest_shared_memory_local_stage.cc | 32 +-
src/tir/transforms/memhammer_lower_auto_copy.cc | 18 +-
src/tir/transforms/memhammer_tensorcore_rewrite.cc | 130 +--
src/tir/transforms/narrow_datatype.cc | 2 +-
.../plan_update_buffer_allocation_location.cc | 30 +-
.../remove_weight_layout_rewrite_block.cc | 8 +-
src/tir/transforms/renew_defs.cc | 4 +-
src/tir/transforms/transform_mma_buffer_layout.cc | 4 +-
tests/cpp/data_type_rewriter_test.cc | 12 +-
tests/cpp/ir_functor_test.cc | 20 +-
tests/python/codegen/test_gpu_codegen_allreduce.py | 10 +-
tests/python/codegen/test_inject_ptx_ldg32.py | 4 +-
.../python/codegen/test_target_codegen_cuda_fp4.py | 10 +-
.../python/codegen/test_target_codegen_cuda_fp8.py | 48 +-
tests/python/codegen/test_target_codegen_device.py | 6 +-
.../codegen/test_target_codegen_gpu_common.py | 2 +-
tests/python/codegen/test_target_codegen_llvm.py | 16 +-
tests/python/codegen/test_target_codegen_metal.py | 14 +-
tests/python/codegen/test_target_codegen_opencl.py | 14 +-
tests/python/codegen/test_target_codegen_riscv.py | 2 +-
tests/python/codegen/test_target_codegen_rocm.py | 4 +-
tests/python/codegen/test_target_codegen_vulkan.py | 6 +-
.../contrib/test_android/test_meta_schedule.py | 2 +-
.../python/contrib/test_hexagon/infrastructure.py | 6 +-
.../test_hexagon/test_async_dma_pipeline.py | 56 +-
.../contrib/test_hexagon/test_dma_builtin.py | 2 +-
.../contrib/test_hexagon/test_memory_alloc.py | 2 +-
.../contrib/test_hexagon/test_meta_schedule.py | 12 +-
.../contrib/test_hexagon/test_parallel_hvx.py | 8 +-
.../test_hexagon/test_parallel_hvx_load_vtcm.py | 34 +-
.../contrib/test_hexagon/test_parallel_scalar.py | 8 +-
.../test_relax_2d_buffer_allocation.py | 2 +-
tests/python/contrib/test_hexagon/test_sigmoid.py | 2 +-
.../test_hexagon/test_software_pipeline_async.py | 10 +-
.../contrib/test_hexagon/test_thread_pool.py | 4 +-
tests/python/contrib/test_hexagon/test_vtcm.py | 4 +-
.../contrib/test_hexagon/test_vtcm_bandwidth.py | 8 +-
.../python/contrib/test_tir_triton_integration.py | 4 +-
tests/python/disco/test_nvshmem.py | 4 +-
tests/python/disco/test_session.py | 6 +-
tests/python/dlight/test_benchmark.py | 28 +-
tests/python/dlight/test_cpu_gemv.py | 120 +--
tests/python/dlight/test_gpu_conv.py | 20 +-
tests/python/dlight/test_gpu_fallback.py | 30 +-
tests/python/dlight/test_gpu_gemv.py | 210 ++--
tests/python/dlight/test_gpu_general_reduction.py | 124 +--
tests/python/dlight/test_gpu_low_batch_gemv.py | 92 +-
tests/python/dlight/test_gpu_matmul.py | 166 +--
tests/python/dlight/test_gpu_matmul_tensorize.py | 198 ++--
tests/python/dlight/test_gpu_reduction.py | 222 ++--
tests/python/dlight/test_gpu_rmsnorm.py | 56 +-
tests/python/dlight/test_gpu_transpose.py | 32 +-
tests/python/dlight/test_primitives.py | 8 +-
tests/python/ir/test_datatype_nv_fp8.py | 2 +-
tests/python/ir/test_pass_instrument.py | 2 +-
.../meta_schedule/test_meta_schedule_arg_info.py | 2 +-
.../meta_schedule/test_meta_schedule_builder.py | 8 +-
.../meta_schedule/test_meta_schedule_cost_model.py | 4 +-
.../meta_schedule/test_meta_schedule_database.py | 8 +-
...schedule_feature_extractor_per_store_feature.py | 20 +-
.../test_meta_schedule_measure_callback.py | 2 +-
.../test_meta_schedule_mma_tensorize.py | 36 +-
...eta_schedule_mutator_mutate_compute_location.py | 6 +-
.../test_meta_schedule_mutator_mutate_parallel.py | 6 +-
..._meta_schedule_mutator_mutate_thread_binding.py | 4 +-
.../test_meta_schedule_mutator_mutate_tile_size.py | 4 +-
.../test_meta_schedule_mutator_mutate_unroll.py | 6 +-
.../test_meta_schedule_post_order_apply.py | 40 +-
...ule_postproc_disallow_async_strided_mem_copy.py | 6 +-
...meta_schedule_postproc_disallow_dynamic_loop.py | 4 +-
..._schedule_postproc_rewrite_cooperative_fetch.py | 26 +-
.../test_meta_schedule_postproc_rewrite_layout.py | 96 +-
...e_postproc_rewrite_parallel_vectorize_unroll.py | 48 +-
...ta_schedule_postproc_rewrite_reduction_block.py | 34 +-
...est_meta_schedule_postproc_rewrite_tensorize.py | 60 +-
...meta_schedule_postproc_rewrite_unbound_block.py | 28 +-
.../test_meta_schedule_postproc_verify_gpu_code.py | 58 +-
...est_meta_schedule_postproc_verify_vtcm_limit.py | 14 +-
.../meta_schedule/test_meta_schedule_runner.py | 12 +-
...test_meta_schedule_schedule_rule_add_rfactor.py | 32 +-
...eta_schedule_schedule_rule_apply_custom_rule.py | 6 +-
.../test_meta_schedule_schedule_rule_auto_bind.py | 14 +-
...test_meta_schedule_schedule_rule_auto_inline.py | 108 +-
...chedule_schedule_rule_cross_thread_reduction.py | 120 +--
.../test_meta_schedule_schedule_rule_mlt.py | 112 +-
.../test_meta_schedule_schedule_rule_mlt_intrin.py | 66 +-
.../test_meta_schedule_schedule_rule_mlt_tc.py | 380 +++----
...dule_schedule_rule_parallel_vectorize_unroll.py | 56 +-
...hedule_schedule_rule_random_compute_location.py | 10 +-
.../test_meta_schedule_search_strategy.py | 6 +-
.../meta_schedule/test_meta_schedule_space_cpu.py | 538 +++++-----
.../meta_schedule/test_meta_schedule_space_cuda.py | 262 ++---
.../test_meta_schedule_space_cuda_async.py | 72 +-
.../test_meta_schedule_space_generator.py | 4 +-
.../test_meta_schedule_space_post_opt.py | 2 +-
.../test_meta_schedule_task_scheduler.py | 12 +-
.../test_meta_schedule_trace_apply.py | 658 ++++++------
.../test_meta_schedule_tune_context.py | 2 +-
.../meta_schedule/test_meta_schedule_tune_tir.py | 10 +-
.../test_transform_fold_vdevice_scope_change.py | 28 +-
.../test_distributed_transform_lower_distir.py | 46 +-
...ributed_transform_lower_global_to_local_view.py | 212 ++--
...est_distributed_transform_propagate_sharding.py | 134 +--
.../test_distributed_tvmscript_parser.py | 6 +-
.../test_distributed_tvmscript_printer.py | 6 +-
tests/python/relax/test_analysis.py | 34 +-
.../python/relax/test_analysis_detect_recursion.py | 4 +-
.../test_analysis_suggest_layout_transforms.py | 106 +-
tests/python/relax/test_analysis_well_formed.py | 36 +-
tests/python/relax/test_ast_printer.py | 2 +-
.../python/relax/test_backend_dispatch_sampling.py | 14 +-
tests/python/relax/test_blockbuilder_emit_te.py | 4 +-
tests/python/relax/test_codegen_coreml.py | 1 -
tests/python/relax/test_codegen_cutlass.py | 58 +-
tests/python/relax/test_dataflow_inplace.py | 20 +-
tests/python/relax/test_dataflow_pattern.py | 6 +-
...eliminate_pad_branch_using_buffer_assumption.py | 48 +-
tests/python/relax/test_frontend_common.py | 6 +-
tests/python/relax/test_frontend_dynamo.py | 14 +-
tests/python/relax/test_frontend_nn_op.py | 24 +-
.../relax/test_meta_schedule_relax_integration.py | 20 +-
tests/python/relax/test_op_index.py | 6 +-
tests/python/relax/test_op_misc.py | 2 +-
tests/python/relax/test_op_nn.py | 54 +
.../python/relax/test_optimize_layout_transform.py | 32 +-
tests/python/relax/test_pytorch_integration.py | 2 +-
.../python/relax/test_runtime_builtin_rnn_state.py | 4 +-
tests/python/relax/test_tir_call_source_kernel.py | 4 +-
tests/python/relax/test_transform.py | 12 +-
tests/python/relax/test_transform_alter_op_impl.py | 56 +-
.../test_transform_annotate_tir_op_pattern.py | 46 +-
...st_transform_attach_attr_layout_free_buffers.py | 18 +-
.../relax/test_transform_attach_global_symbol.py | 4 +-
tests/python/relax/test_transform_bind_params.py | 2 +-
tests/python/relax/test_transform_cse.py | 4 +-
.../relax/test_transform_dead_code_elimination.py | 14 +-
.../python/relax/test_transform_few_shot_tuning.py | 78 +-
tests/python/relax/test_transform_fold_constant.py | 14 +-
tests/python/relax/test_transform_fuse_ops.py | 82 +-
.../relax/test_transform_fuse_ops_by_pattern.py | 6 +-
tests/python/relax/test_transform_fuse_tir.py | 144 +--
.../relax/test_transform_fuse_transpose_matmul.py | 8 +-
tests/python/relax/test_transform_gradient.py | 2 +-
.../relax/test_transform_gradient_te_register.py | 36 +-
tests/python/relax/test_transform_lambda_lift.py | 2 +-
.../relax/test_transform_lazy_transform_params.py | 36 +-
tests/python/relax/test_transform_legalize_ops.py | 24 +-
.../relax/test_transform_legalize_ops_binary.py | 120 +--
.../relax/test_transform_legalize_ops_ccl.py | 8 +-
.../test_transform_legalize_ops_create_datatype.py | 46 +-
.../test_transform_legalize_ops_distributed.py | 4 +-
.../relax/test_transform_legalize_ops_grad.py | 52 +-
.../relax/test_transform_legalize_ops_image.py | 4 +-
..._transform_legalize_ops_index_linear_algebra.py | 60 +-
.../test_transform_legalize_ops_manipulate.py | 126 +--
.../python/relax/test_transform_legalize_ops_nn.py | 639 ++++++------
.../relax/test_transform_legalize_ops_qdq.py | 44 +-
...st_transform_legalize_ops_search_statistical.py | 132 +--
.../relax/test_transform_lift_transform_params.py | 16 +-
.../test_transform_merge_composite_functions.py | 4 +-
.../test_transform_meta_schedule_apply_database.py | 8 +-
.../relax/test_transform_meta_schedule_tuning.py | 12 +-
.../relax/test_transform_rewrite_cuda_graph.py | 40 +-
.../test_transform_rewrite_dataflow_reshape.py | 56 +-
...nsform_specialize_primfunc_based_on_callsite.py | 14 +-
.../test_transform_split_layout_rewrite_preproc.py | 36 +-
.../relax/test_transform_to_mixed_precision.py | 2 +-
tests/python/relax/test_tvmscript_parser.py | 20 +-
tests/python/relax/test_tvmscript_pyfunc.py | 2 +-
.../relax/test_vm_alloc_storage_with_scope.py | 2 +-
tests/python/relax/test_vm_build.py | 14 +-
tests/python/relax/test_vm_codegen_only.py | 4 +-
tests/python/relax/test_vm_cuda_graph.py | 4 +-
tests/python/relax/texture/test_texture_nd.py | 4 +-
.../python/runtime/test_evaluator_with_preproc.py | 4 +-
tests/python/runtime/test_runtime_rpc.py | 2 +-
tests/python/te/test_te_create_primfunc.py | 76 +-
...test_tir_analysis_calculate_allocated_memory.py | 18 +-
.../test_tir_analysis_detect_buffer_access_lca.py | 14 +-
.../test_tir_analysis_get_block_access_region.py | 80 +-
.../test_tir_analysis_verify_well_formed.py | 10 +-
tests/python/tir-base/test_slice_tir.py | 10 +-
.../tir-base/test_tir_block_dependence_info.py | 36 +-
tests/python/tir-base/test_tir_host_func.py | 4 +-
tests/python/tir-base/test_tir_intrin.py | 2 +-
tests/python/tir-base/test_tir_ptx_cp_async.py | 6 +-
tests/python/tir-base/test_tir_ptx_ldmatrix.py | 2 +-
tests/python/tir-base/test_tir_renew_defs.py | 24 +-
tests/python/tir-base/test_tir_specialize.py | 30 +-
.../python/tir-base/test_tir_te_extern_primfunc.py | 24 +-
tests/python/tir-base/test_tir_texture_scope.py | 8 +-
.../tir-base/test_tir_unsafe_hide_buffer_access.py | 14 +-
.../tir-schedule/test_tir_schedule_analysis.py | 32 +-
.../test_tir_schedule_annotate_buffer_access.py | 78 +-
.../tir-schedule/test_tir_schedule_block_scope.py | 48 +-
.../tir-schedule/test_tir_schedule_blockize.py | 82 +-
.../tir-schedule/test_tir_schedule_cache_index.py | 22 +-
.../test_tir_schedule_cache_read_write.py | 424 ++++----
.../tir-schedule/test_tir_schedule_compute_at.py | 508 ++++-----
.../test_tir_schedule_compute_inline.py | 390 +++----
.../test_tir_schedule_decompose_padding.py | 78 +-
.../python/tir-schedule/test_tir_schedule_error.py | 12 +-
.../tir-schedule/test_tir_schedule_for_kind.py | 156 +--
.../test_tir_schedule_fuse_reduction_epilogue.py | 22 +-
...ir_schedule_fuse_reduction_epilogue_clipping.py | 26 +-
...st_tir_schedule_fuse_reduction_epilogue_relu.py | 22 +-
.../tir-schedule/test_tir_schedule_instruction.py | 6 +-
.../python/tir-schedule/test_tir_schedule_merge.py | 70 +-
.../tir-schedule/test_tir_schedule_pad_einsum.py | 56 +-
.../tir-schedule/test_tir_schedule_partition.py | 138 +--
.../test_tir_schedule_read_write_at.py | 38 +-
.../tir-schedule/test_tir_schedule_reduction.py | 92 +-
.../tir-schedule/test_tir_schedule_reindex.py | 56 +-
.../tir-schedule/test_tir_schedule_reorder.py | 84 +-
.../test_tir_schedule_reorder_block_iter_var.py | 12 +-
.../tir-schedule/test_tir_schedule_rfactor.py | 326 +++---
.../test_tir_schedule_rolling_buffer.py | 98 +-
.../tir-schedule/test_tir_schedule_sampling.py | 22 +-
.../test_tir_schedule_set_axis_separator.py | 40 +-
.../tir-schedule/test_tir_schedule_set_dtype.py | 26 +-
.../tir-schedule/test_tir_schedule_set_scope.py | 28 +-
.../tir-schedule/test_tir_schedule_split_fuse.py | 134 +--
.../python/tir-schedule/test_tir_schedule_state.py | 26 +-
.../test_tir_schedule_state_cached_flags.py | 242 ++---
.../test_tir_schedule_storage_align.py | 38 +-
.../tir-schedule/test_tir_schedule_tensorize.py | 106 +-
.../python/tir-schedule/test_tir_schedule_trace.py | 88 +-
.../tir-schedule/test_tir_schedule_transform.py | 18 +-
.../test_tir_schedule_transform_layout.py | 188 ++--
.../tir-schedule/test_tir_schedule_utilities.py | 94 +-
.../test_tir_inline_private_functions.py | 6 +-
.../test_tir_transform_compact_buffer_region.py | 280 ++---
.../test_tir_transform_convert_blocks_to_opaque.py | 14 +-
.../test_tir_transform_flatten_buffer.py | 4 +-
...test_tir_transform_force_narrow_index_to_i32.py | 18 +-
.../test_tir_transform_inject_permuted_layout.py | 116 +--
.../test_tir_transform_inject_ptx_async_copy.py | 38 +-
.../test_tir_transform_inject_software_pipeline.py | 530 +++++-----
.../test_tir_transform_lift_thread_binding.py | 38 +-
...t_tir_transform_lower_cross_thread_reduction.py | 256 ++---
.../test_tir_transform_lower_init_block.py | 8 +-
.../test_tir_transform_lower_match_buffer.py | 48 +-
.../test_tir_transform_lower_opaque_block.py | 46 +-
...transform_manifest_shared_memory_local_stage.py | 28 +-
...test_tir_transform_memhammer_lower_auto_copy.py | 198 ++--
.../test_tir_transform_narrow_datatype.py | 4 +-
...sform_plan_update_buffer_allocation_location.py | 76 +-
.../test_tir_transform_profiling_instr.py | 52 +-
...transform_remove_weight_layout_rewrite_block.py | 12 +-
.../test_tir_transform_unify_thread_binding.py | 40 +-
.../test_transform_default_gpu_schedule.py | 74 +-
tests/python/tvmscript/test_tvmscript_complete.py | 64 +-
.../tvmscript/test_tvmscript_error_report.py | 76 +-
.../tvmscript/test_tvmscript_ir_builder_tir.py | 20 +-
.../tvmscript/test_tvmscript_meta_programming.py | 10 +-
tests/python/tvmscript/test_tvmscript_ops.py | 6 +-
.../tvmscript/test_tvmscript_parser_source.py | 2 +-
.../python/tvmscript/test_tvmscript_parser_tir.py | 60 +-
.../tvmscript/test_tvmscript_printer_highlight.py | 2 +-
.../test_tvmscript_printer_structural_equal.py | 4 +-
.../python/tvmscript/test_tvmscript_printer_tir.py | 26 +-
.../python/tvmscript/test_tvmscript_regression.py | 4 +-
tests/python/tvmscript/test_tvmscript_roundtrip.py | 112 +-
.../tvmscript/test_tvmscript_syntax_sugar.py | 36 +-
tests/python/tvmscript/test_tvmscript_type.py | 22 +-
web/package-lock.json | 1085 ++++++++++++++++----
web/package.json | 2 +-
web/src/webgpu.ts | 13 +
web/tests/python/relax_rpc_test.py | 2 +-
web/tests/python/webgpu_rpc_test.py | 2 +-
494 files changed, 11694 insertions(+), 10841 deletions(-)