This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git


    from ab25b49225 [TIR] Fix InjectPTXLDG32 segfaults and skip non-CUDA 
targets (#18671)
     add 877b448b02 [REFACTOR][TIR] Rename tir.Block to SBlock (#18689)
     add 95f41d66e7 [Web][Version] Fix WebLLM vision model issues (#18680)
     add c043e4fc8d [Relax] Add NN operator attributes include to TensorRT 
codegen (#18690)
     add 71ac3e821b [Relax][NN] Add batch_flatten operator (#18677)

No new revisions were added by this update.

Summary of changes:
 apps/android_rpc/tests/android_rpc_test.py         |    2 +-
 apps/ios_rpc/tests/ios_rpc_test.py                 |    2 +-
 docs/deep_dive/relax/learning.rst                  |    6 +-
 docs/deep_dive/relax/tutorials/relax_creation.py   |    6 +-
 docs/deep_dive/tensor_ir/abstraction.rst           |    4 +-
 docs/deep_dive/tensor_ir/learning.rst              |   12 +-
 docs/deep_dive/tensor_ir/tutorials/tir_creation.py |   16 +-
 .../tensor_ir/tutorials/tir_transformation.py      |    8 +-
 docs/how_to/tutorials/cross_compilation_and_rpc.py |    2 +-
 docs/reference/api/python/tir/tir.rst              |    2 +-
 .../tvm/meta_schedule/schedule/cuda/thread_bind.h  |    2 +-
 .../tvm/meta_schedule/schedule/generic/winograd.h  |    2 +-
 include/tvm/meta_schedule/schedule_rule.h          |    8 +-
 include/tvm/relax/analysis.h                       |    2 +-
 include/tvm/relax/distributed/axis_group_graph.h   |    2 +-
 include/tvm/script/ir_builder/base.h               |    2 +-
 include/tvm/script/ir_builder/relax/frame.h        |   21 +-
 include/tvm/script/ir_builder/relax/ir.h           |    4 +-
 include/tvm/script/ir_builder/tir/frame.h          |   44 +-
 include/tvm/script/ir_builder/tir/ir.h             |    6 +-
 include/tvm/tir/analysis.h                         |   12 +-
 include/tvm/tir/block_dependence_info.h            |   20 +-
 include/tvm/tir/block_scope.h                      |   34 +-
 include/tvm/tir/data_type_rewriter.h               |    8 +-
 include/tvm/tir/function.h                         |    4 +-
 include/tvm/tir/schedule/instruction.h             |    8 +-
 include/tvm/tir/schedule/schedule.h                |  156 +--
 include/tvm/tir/schedule/state.h                   |   48 +-
 include/tvm/tir/stmt.h                             |   64 +-
 include/tvm/tir/stmt_functor.h                     |   16 +-
 include/tvm/tir/transform.h                        |   28 +-
 include/tvm/tir/utils.h                            |   14 +-
 jvm/core/src/test/scripts/prepare_test_libs.py     |    2 +-
 python/tvm/dlight/__init__.py                      |    2 +-
 python/tvm/dlight/adreno/convolution.py            |    6 +-
 python/tvm/dlight/adreno/fallback.py               |   16 +-
 python/tvm/dlight/adreno/layout_transform.py       |    2 +-
 python/tvm/dlight/adreno/pool.py                   |   10 +-
 python/tvm/dlight/adreno/utils.py                  |    8 +-
 python/tvm/dlight/analysis/__init__.py             |    4 +-
 python/tvm/dlight/analysis/common_analysis.py      |   61 +-
 python/tvm/dlight/analysis/gemv.py                 |   14 +-
 python/tvm/dlight/base/common_schedules.py         |   20 +-
 python/tvm/dlight/cpu/gemv.py                      |    6 +-
 python/tvm/dlight/gpu/fallback.py                  |    2 +-
 python/tvm/dlight/gpu/gemv.py                      |   14 +-
 python/tvm/dlight/gpu/low_batch_gemv.py            |   32 +-
 python/tvm/dlight/gpu/matmul.py                    |   42 +-
 python/tvm/dlight/gpu/reduction.py                 |   16 +-
 python/tvm/dlight/gpu/rmsnorm.py                   |    8 +-
 python/tvm/dlight/gpu/transpose.py                 |    4 +-
 python/tvm/exec/gpu_memory_bandwidth.py            |    2 +-
 .../schedule/cuda/layout_transform.py              |   18 +-
 .../schedule_rule/multi_level_tiling.py            |    6 +-
 .../meta_schedule/schedule_rule/schedule_rule.py   |   10 +-
 python/tvm/relax/analysis/analysis.py              |    6 +-
 python/tvm/relax/backend/gpu_generic/cumsum.py     |    2 +-
 python/tvm/relax/backend/gpu_generic/sampling.py   |    8 +-
 python/tvm/relax/backend/metal/coreml.py           |    5 +-
 python/tvm/relax/block_builder.py                  |    8 +-
 python/tvm/relax/frontend/nn/llm/kv_cache.py       |  174 ++--
 .../relax/frontend/nn/llm/position_embedding.py    |   12 +-
 python/tvm/relax/frontend/nn/llm/tree_attn.py      |   98 +-
 python/tvm/relax/frontend/nn/op.py                 |    6 +-
 python/tvm/relax/op/nn/__init__.py                 |    1 +
 python/tvm/relax/op/nn/nn.py                       |   19 +
 python/tvm/relax/transform/legalize_ops/nn.py      |    7 +
 python/tvm/relax/transform/transform.py            |    4 +-
 python/tvm/script/ir_builder/base.py               |    2 +-
 python/tvm/script/ir_builder/relax/frame.py        |    4 +-
 python/tvm/script/ir_builder/relax/ir.py           |    4 +-
 python/tvm/script/ir_builder/tir/frame.py          |    6 +-
 python/tvm/script/ir_builder/tir/ir.py             |   22 +-
 python/tvm/script/parser/relax/parser.py           |    4 +-
 python/tvm/te/operation.py                         |    4 +-
 python/tvm/testing/tir.py                          |    8 +-
 python/tvm/tir/__init__.py                         |    6 +-
 python/tvm/tir/analysis/analysis.py                |   24 +-
 python/tvm/tir/block_dependence_info.py            |   30 +-
 python/tvm/tir/block_scope.py                      |   26 +-
 python/tvm/tir/function.py                         |    4 +-
 python/tvm/tir/functor.py                          |   58 +-
 python/tvm/tir/schedule/__init__.py                |    4 +-
 python/tvm/tir/schedule/analysis.py                |   14 +-
 python/tvm/tir/schedule/instruction.py             |   14 +-
 python/tvm/tir/schedule/schedule.py                |  626 +++++------
 python/tvm/tir/schedule/state.py                   |   18 +-
 python/tvm/tir/schedule/transform.py               |    6 +-
 python/tvm/tir/stmt.py                             |   22 +-
 python/tvm/tir/tensor_intrin/arm_cpu.py            |   36 +-
 python/tvm/tir/tensor_intrin/cuda.py               |   72 +-
 python/tvm/tir/tensor_intrin/dot_product_common.py |    6 +-
 python/tvm/tir/tensor_intrin/hexagon.py            |   24 +-
 python/tvm/tir/tensor_intrin/metal.py              |   24 +-
 python/tvm/tir/tensor_intrin/riscv_cpu.py          |    8 +-
 python/tvm/tir/tensor_intrin/rocm.py               |   28 +-
 python/tvm/tir/tensor_intrin/x86.py                |    8 +-
 python/tvm/tir/transform/transform.py              |    4 +-
 src/arith/ir_mutator_with_analyzer.cc              |    2 +-
 src/arith/ir_mutator_with_analyzer.h               |    2 +-
 src/arith/ir_visitor_with_analyzer.cc              |    2 +-
 src/arith/ir_visitor_with_analyzer.h               |    2 +-
 .../feature_extractor/per_store_feature.cc         |    4 +-
 src/meta_schedule/module_equality.cc               |    6 +-
 .../mutator/mutate_compute_location.cc             |    2 +-
 src/meta_schedule/mutator/mutate_parallel.cc       |   36 +-
 .../postproc/rewrite_cooperative_fetch.cc          |   10 +-
 src/meta_schedule/postproc/rewrite_layout.cc       |   30 +-
 .../postproc/rewrite_parallel_vectorize_unroll.cc  |   23 +-
 .../postproc/rewrite_reduction_block.cc            |   14 +-
 src/meta_schedule/postproc/rewrite_tensorize.cc    |   18 +-
 .../postproc/rewrite_unbound_block.cc              |    8 +-
 src/meta_schedule/postproc/verify_gpu_code.cc      |    2 +-
 src/meta_schedule/schedule/cpu/winograd.cc         |   18 +-
 src/meta_schedule/schedule/cuda/thread_bind.cc     |    2 +-
 src/meta_schedule/schedule/cuda/winograd.cc        |   24 +-
 src/meta_schedule/schedule/generic/winograd.cc     |    8 +-
 src/meta_schedule/schedule_rule/add_rfactor.cc     |    6 +-
 .../schedule_rule/apply_custom_rule.cc             |    2 +-
 src/meta_schedule/schedule_rule/auto_bind.cc       |    4 +-
 src/meta_schedule/schedule_rule/auto_inline.cc     |   14 +-
 .../schedule_rule/cross_thread_reduction.cc        |   43 +-
 .../schedule_rule/multi_level_tiling.cc            |   26 +-
 .../schedule_rule/multi_level_tiling.h             |   14 +-
 .../multi_level_tiling_tensor_core.cc              |   60 +-
 .../multi_level_tiling_wide_vector.cc              |   10 +-
 .../multi_level_tiling_with_intrin.cc              |    8 +-
 .../schedule_rule/parallel_vectorize_unroll.cc     |    6 +-
 .../schedule_rule/random_compute_location.cc       |   10 +-
 src/meta_schedule/schedule_rule/schedule_rule.cc   |    2 +-
 .../space_generator/post_order_apply.cc            |    6 +-
 src/meta_schedule/trace_apply.cc                   |   54 +-
 src/meta_schedule/utils.h                          |   32 +-
 src/relax/analysis/layout_transformation.cc        |   25 +-
 src/relax/analysis/tir_op_pattern_kind.cc          |   14 +-
 src/relax/backend/contrib/tensorrt/codegen.cc      |    3 +-
 src/relax/backend/task_extraction.cc               |    6 +-
 .../transform/lower_global_view_to_local_view.cc   |   33 +-
 src/relax/ir/block_builder.cc                      |   30 +-
 src/relax/op/nn/nn.cc                              |   50 +
 src/relax/op/nn/nn.h                               |    3 +
 src/relax/transform/dataflow_inplace.cc            |    4 +-
 src/relax/transform/fuse_tir.cc                    |   26 +-
 src/relax/transform/rewrite_cuda_graph.cc          |    6 +-
 src/relax/transform/split_call_tir_by_pattern.cc   |   42 +-
 .../transform/split_layout_rewrite_preproc.cc      |   33 +-
 src/script/ir_builder/relax/frame.cc               |   26 +-
 src/script/ir_builder/relax/ir.cc                  |   23 +-
 src/script/ir_builder/relax/utils.h                |    5 +-
 src/script/ir_builder/tir/frame.cc                 |   16 +-
 src/script/ir_builder/tir/ir.cc                    |   28 +-
 src/script/ir_builder/tir/utils.h                  |   18 +-
 src/script/printer/tir/block.cc                    |   20 +-
 src/script/printer/tir/function.cc                 |   16 +-
 src/te/operation/create_primfunc.cc                |  104 +-
 src/tir/analysis/block_access_region_detector.cc   |   18 +-
 src/tir/analysis/buffer_access_lca_detector.cc     |   10 +-
 src/tir/analysis/control_flow_graph.cc             |    4 +-
 src/tir/analysis/estimate_flops.cc                 |    4 +-
 src/tir/analysis/stmt_finding.cc                   |   26 +-
 src/tir/analysis/verify_well_formed.cc             |    4 +-
 src/tir/ir/block_dependence_info.cc                |   21 +-
 src/tir/ir/block_scope.cc                          |   24 +-
 src/tir/ir/data_type_rewriter.cc                   |   20 +-
 src/tir/ir/py_functor.cc                           |   36 +-
 src/tir/ir/script/script_complete.cc               |   18 +-
 src/tir/ir/specialize.cc                           |   10 +-
 src/tir/ir/stmt.cc                                 |   34 +-
 src/tir/ir/stmt_functor.cc                         |   12 +-
 src/tir/ir/tir_visitor_with_path.cc                |    4 +-
 src/tir/ir/tir_visitor_with_path.h                 |    4 +-
 src/tir/schedule/analysis.h                        |   50 +-
 src/tir/schedule/analysis/analysis.cc              |  258 ++---
 src/tir/schedule/analysis/reducer.cc               |   16 +-
 src/tir/schedule/analysis/verify.cc                |   18 +-
 src/tir/schedule/concrete_schedule.cc              |  200 ++--
 src/tir/schedule/concrete_schedule.h               |  133 +--
 src/tir/schedule/error.h                           |    4 +-
 src/tir/schedule/instruction.cc                    |    2 +-
 src/tir/schedule/ir_comparator.cc                  |   18 +-
 src/tir/schedule/ir_comparator.h                   |   10 +-
 src/tir/schedule/primitive.h                       |    6 +-
 src/tir/schedule/primitive/annotate.cc             |   30 +-
 .../schedule/primitive/annotate_buffer_access.cc   |   16 +-
 src/tir/schedule/primitive/block_annotate.cc       |   65 +-
 src/tir/schedule/primitive/blockize_tensorize.cc   |  191 ++--
 src/tir/schedule/primitive/cache_index.cc          |   62 +-
 src/tir/schedule/primitive/cache_read_write.cc     |  276 ++---
 src/tir/schedule/primitive/compute_at.cc           |   61 +-
 src/tir/schedule/primitive/compute_inline.cc       |  220 ++--
 src/tir/schedule/primitive/decompose_padding.cc    |   91 +-
 src/tir/schedule/primitive/for_kind.cc             |   12 +-
 src/tir/schedule/primitive/get_block_loop.cc       |   45 +-
 src/tir/schedule/primitive/hide_buffer_access.cc   |   12 +-
 .../schedule/primitive/layout_transformation.cc    |  148 +--
 src/tir/schedule/primitive/loop_transformation.cc  |   92 +-
 src/tir/schedule/primitive/pad_einsum.cc           |   65 +-
 src/tir/schedule/primitive/read_write_at.cc        |   96 +-
 src/tir/schedule/primitive/reduction.cc            |  117 +--
 .../schedule/primitive/reorder_block_iter_var.cc   |   34 +-
 src/tir/schedule/primitive/rolling_buffer.cc       |   54 +-
 src/tir/schedule/primitive/sampling.cc             |    4 +-
 src/tir/schedule/schedule.cc                       |   36 +-
 src/tir/schedule/state.cc                          |  136 +--
 src/tir/schedule/trace.cc                          |   20 +-
 src/tir/schedule/traced_schedule.cc                |  174 ++--
 src/tir/schedule/traced_schedule.h                 |  105 +-
 src/tir/schedule/transform.cc                      |   66 +-
 src/tir/schedule/transform.h                       |   20 +-
 src/tir/schedule/utils.h                           |   20 +-
 src/tir/transforms/bind_params.cc                  |    6 +-
 src/tir/transforms/compact_buffer_region.cc        |   12 +-
 src/tir/transforms/convert_blocks_to_opaque.cc     |   12 +-
 src/tir/transforms/default_gpu_schedule.cc         |    6 +-
 src/tir/transforms/flatten_buffer.cc               |    4 +-
 src/tir/transforms/force_narrow_index_to_i32.cc    |    4 +-
 src/tir/transforms/inject_permuted_layout.cc       |    4 +-
 src/tir/transforms/inject_software_pipeline.cc     |  114 +-
 src/tir/transforms/inline_private_functions.cc     |    4 +-
 src/tir/transforms/ir_utils.cc                     |    8 +-
 src/tir/transforms/lower_cross_thread_reduction.cc |  138 +--
 src/tir/transforms/lower_init_block.cc             |    4 +-
 src/tir/transforms/lower_match_buffer.cc           |    4 +-
 src/tir/transforms/lower_opaque_block.cc           |    6 +-
 .../manifest_shared_memory_local_stage.cc          |   32 +-
 src/tir/transforms/memhammer_lower_auto_copy.cc    |   18 +-
 src/tir/transforms/memhammer_tensorcore_rewrite.cc |  130 +--
 src/tir/transforms/narrow_datatype.cc              |    2 +-
 .../plan_update_buffer_allocation_location.cc      |   30 +-
 .../remove_weight_layout_rewrite_block.cc          |    8 +-
 src/tir/transforms/renew_defs.cc                   |    4 +-
 src/tir/transforms/transform_mma_buffer_layout.cc  |    4 +-
 tests/cpp/data_type_rewriter_test.cc               |   12 +-
 tests/cpp/ir_functor_test.cc                       |   20 +-
 tests/python/codegen/test_gpu_codegen_allreduce.py |   10 +-
 tests/python/codegen/test_inject_ptx_ldg32.py      |    4 +-
 .../python/codegen/test_target_codegen_cuda_fp4.py |   10 +-
 .../python/codegen/test_target_codegen_cuda_fp8.py |   48 +-
 tests/python/codegen/test_target_codegen_device.py |    6 +-
 .../codegen/test_target_codegen_gpu_common.py      |    2 +-
 tests/python/codegen/test_target_codegen_llvm.py   |   16 +-
 tests/python/codegen/test_target_codegen_metal.py  |   14 +-
 tests/python/codegen/test_target_codegen_opencl.py |   14 +-
 tests/python/codegen/test_target_codegen_riscv.py  |    2 +-
 tests/python/codegen/test_target_codegen_rocm.py   |    4 +-
 tests/python/codegen/test_target_codegen_vulkan.py |    6 +-
 .../contrib/test_android/test_meta_schedule.py     |    2 +-
 .../python/contrib/test_hexagon/infrastructure.py  |    6 +-
 .../test_hexagon/test_async_dma_pipeline.py        |   56 +-
 .../contrib/test_hexagon/test_dma_builtin.py       |    2 +-
 .../contrib/test_hexagon/test_memory_alloc.py      |    2 +-
 .../contrib/test_hexagon/test_meta_schedule.py     |   12 +-
 .../contrib/test_hexagon/test_parallel_hvx.py      |    8 +-
 .../test_hexagon/test_parallel_hvx_load_vtcm.py    |   34 +-
 .../contrib/test_hexagon/test_parallel_scalar.py   |    8 +-
 .../test_relax_2d_buffer_allocation.py             |    2 +-
 tests/python/contrib/test_hexagon/test_sigmoid.py  |    2 +-
 .../test_hexagon/test_software_pipeline_async.py   |   10 +-
 .../contrib/test_hexagon/test_thread_pool.py       |    4 +-
 tests/python/contrib/test_hexagon/test_vtcm.py     |    4 +-
 .../contrib/test_hexagon/test_vtcm_bandwidth.py    |    8 +-
 .../python/contrib/test_tir_triton_integration.py  |    4 +-
 tests/python/disco/test_nvshmem.py                 |    4 +-
 tests/python/disco/test_session.py                 |    6 +-
 tests/python/dlight/test_benchmark.py              |   28 +-
 tests/python/dlight/test_cpu_gemv.py               |  120 +--
 tests/python/dlight/test_gpu_conv.py               |   20 +-
 tests/python/dlight/test_gpu_fallback.py           |   30 +-
 tests/python/dlight/test_gpu_gemv.py               |  210 ++--
 tests/python/dlight/test_gpu_general_reduction.py  |  124 +--
 tests/python/dlight/test_gpu_low_batch_gemv.py     |   92 +-
 tests/python/dlight/test_gpu_matmul.py             |  166 +--
 tests/python/dlight/test_gpu_matmul_tensorize.py   |  198 ++--
 tests/python/dlight/test_gpu_reduction.py          |  222 ++--
 tests/python/dlight/test_gpu_rmsnorm.py            |   56 +-
 tests/python/dlight/test_gpu_transpose.py          |   32 +-
 tests/python/dlight/test_primitives.py             |    8 +-
 tests/python/ir/test_datatype_nv_fp8.py            |    2 +-
 tests/python/ir/test_pass_instrument.py            |    2 +-
 .../meta_schedule/test_meta_schedule_arg_info.py   |    2 +-
 .../meta_schedule/test_meta_schedule_builder.py    |    8 +-
 .../meta_schedule/test_meta_schedule_cost_model.py |    4 +-
 .../meta_schedule/test_meta_schedule_database.py   |    8 +-
 ...schedule_feature_extractor_per_store_feature.py |   20 +-
 .../test_meta_schedule_measure_callback.py         |    2 +-
 .../test_meta_schedule_mma_tensorize.py            |   36 +-
 ...eta_schedule_mutator_mutate_compute_location.py |    6 +-
 .../test_meta_schedule_mutator_mutate_parallel.py  |    6 +-
 ..._meta_schedule_mutator_mutate_thread_binding.py |    4 +-
 .../test_meta_schedule_mutator_mutate_tile_size.py |    4 +-
 .../test_meta_schedule_mutator_mutate_unroll.py    |    6 +-
 .../test_meta_schedule_post_order_apply.py         |   40 +-
 ...ule_postproc_disallow_async_strided_mem_copy.py |    6 +-
 ...meta_schedule_postproc_disallow_dynamic_loop.py |    4 +-
 ..._schedule_postproc_rewrite_cooperative_fetch.py |   26 +-
 .../test_meta_schedule_postproc_rewrite_layout.py  |   96 +-
 ...e_postproc_rewrite_parallel_vectorize_unroll.py |   48 +-
 ...ta_schedule_postproc_rewrite_reduction_block.py |   34 +-
 ...est_meta_schedule_postproc_rewrite_tensorize.py |   60 +-
 ...meta_schedule_postproc_rewrite_unbound_block.py |   28 +-
 .../test_meta_schedule_postproc_verify_gpu_code.py |   58 +-
 ...est_meta_schedule_postproc_verify_vtcm_limit.py |   14 +-
 .../meta_schedule/test_meta_schedule_runner.py     |   12 +-
 ...test_meta_schedule_schedule_rule_add_rfactor.py |   32 +-
 ...eta_schedule_schedule_rule_apply_custom_rule.py |    6 +-
 .../test_meta_schedule_schedule_rule_auto_bind.py  |   14 +-
 ...test_meta_schedule_schedule_rule_auto_inline.py |  108 +-
 ...chedule_schedule_rule_cross_thread_reduction.py |  120 +--
 .../test_meta_schedule_schedule_rule_mlt.py        |  112 +-
 .../test_meta_schedule_schedule_rule_mlt_intrin.py |   66 +-
 .../test_meta_schedule_schedule_rule_mlt_tc.py     |  380 +++----
 ...dule_schedule_rule_parallel_vectorize_unroll.py |   56 +-
 ...hedule_schedule_rule_random_compute_location.py |   10 +-
 .../test_meta_schedule_search_strategy.py          |    6 +-
 .../meta_schedule/test_meta_schedule_space_cpu.py  |  538 +++++-----
 .../meta_schedule/test_meta_schedule_space_cuda.py |  262 ++---
 .../test_meta_schedule_space_cuda_async.py         |   72 +-
 .../test_meta_schedule_space_generator.py          |    4 +-
 .../test_meta_schedule_space_post_opt.py           |    2 +-
 .../test_meta_schedule_task_scheduler.py           |   12 +-
 .../test_meta_schedule_trace_apply.py              |  658 ++++++------
 .../test_meta_schedule_tune_context.py             |    2 +-
 .../meta_schedule/test_meta_schedule_tune_tir.py   |   10 +-
 .../test_transform_fold_vdevice_scope_change.py    |   28 +-
 .../test_distributed_transform_lower_distir.py     |   46 +-
 ...ributed_transform_lower_global_to_local_view.py |  212 ++--
 ...est_distributed_transform_propagate_sharding.py |  134 +--
 .../test_distributed_tvmscript_parser.py           |    6 +-
 .../test_distributed_tvmscript_printer.py          |    6 +-
 tests/python/relax/test_analysis.py                |   34 +-
 .../python/relax/test_analysis_detect_recursion.py |    4 +-
 .../test_analysis_suggest_layout_transforms.py     |  106 +-
 tests/python/relax/test_analysis_well_formed.py    |   36 +-
 tests/python/relax/test_ast_printer.py             |    2 +-
 .../python/relax/test_backend_dispatch_sampling.py |   14 +-
 tests/python/relax/test_blockbuilder_emit_te.py    |    4 +-
 tests/python/relax/test_codegen_coreml.py          |    1 -
 tests/python/relax/test_codegen_cutlass.py         |   58 +-
 tests/python/relax/test_dataflow_inplace.py        |   20 +-
 tests/python/relax/test_dataflow_pattern.py        |    6 +-
 ...eliminate_pad_branch_using_buffer_assumption.py |   48 +-
 tests/python/relax/test_frontend_common.py         |    6 +-
 tests/python/relax/test_frontend_dynamo.py         |   14 +-
 tests/python/relax/test_frontend_nn_op.py          |   24 +-
 .../relax/test_meta_schedule_relax_integration.py  |   20 +-
 tests/python/relax/test_op_index.py                |    6 +-
 tests/python/relax/test_op_misc.py                 |    2 +-
 tests/python/relax/test_op_nn.py                   |   54 +
 .../python/relax/test_optimize_layout_transform.py |   32 +-
 tests/python/relax/test_pytorch_integration.py     |    2 +-
 .../python/relax/test_runtime_builtin_rnn_state.py |    4 +-
 tests/python/relax/test_tir_call_source_kernel.py  |    4 +-
 tests/python/relax/test_transform.py               |   12 +-
 tests/python/relax/test_transform_alter_op_impl.py |   56 +-
 .../test_transform_annotate_tir_op_pattern.py      |   46 +-
 ...st_transform_attach_attr_layout_free_buffers.py |   18 +-
 .../relax/test_transform_attach_global_symbol.py   |    4 +-
 tests/python/relax/test_transform_bind_params.py   |    2 +-
 tests/python/relax/test_transform_cse.py           |    4 +-
 .../relax/test_transform_dead_code_elimination.py  |   14 +-
 .../python/relax/test_transform_few_shot_tuning.py |   78 +-
 tests/python/relax/test_transform_fold_constant.py |   14 +-
 tests/python/relax/test_transform_fuse_ops.py      |   82 +-
 .../relax/test_transform_fuse_ops_by_pattern.py    |    6 +-
 tests/python/relax/test_transform_fuse_tir.py      |  144 +--
 .../relax/test_transform_fuse_transpose_matmul.py  |    8 +-
 tests/python/relax/test_transform_gradient.py      |    2 +-
 .../relax/test_transform_gradient_te_register.py   |   36 +-
 tests/python/relax/test_transform_lambda_lift.py   |    2 +-
 .../relax/test_transform_lazy_transform_params.py  |   36 +-
 tests/python/relax/test_transform_legalize_ops.py  |   24 +-
 .../relax/test_transform_legalize_ops_binary.py    |  120 +--
 .../relax/test_transform_legalize_ops_ccl.py       |    8 +-
 .../test_transform_legalize_ops_create_datatype.py |   46 +-
 .../test_transform_legalize_ops_distributed.py     |    4 +-
 .../relax/test_transform_legalize_ops_grad.py      |   52 +-
 .../relax/test_transform_legalize_ops_image.py     |    4 +-
 ..._transform_legalize_ops_index_linear_algebra.py |   60 +-
 .../test_transform_legalize_ops_manipulate.py      |  126 +--
 .../python/relax/test_transform_legalize_ops_nn.py |  639 ++++++------
 .../relax/test_transform_legalize_ops_qdq.py       |   44 +-
 ...st_transform_legalize_ops_search_statistical.py |  132 +--
 .../relax/test_transform_lift_transform_params.py  |   16 +-
 .../test_transform_merge_composite_functions.py    |    4 +-
 .../test_transform_meta_schedule_apply_database.py |    8 +-
 .../relax/test_transform_meta_schedule_tuning.py   |   12 +-
 .../relax/test_transform_rewrite_cuda_graph.py     |   40 +-
 .../test_transform_rewrite_dataflow_reshape.py     |   56 +-
 ...nsform_specialize_primfunc_based_on_callsite.py |   14 +-
 .../test_transform_split_layout_rewrite_preproc.py |   36 +-
 .../relax/test_transform_to_mixed_precision.py     |    2 +-
 tests/python/relax/test_tvmscript_parser.py        |   20 +-
 tests/python/relax/test_tvmscript_pyfunc.py        |    2 +-
 .../relax/test_vm_alloc_storage_with_scope.py      |    2 +-
 tests/python/relax/test_vm_build.py                |   14 +-
 tests/python/relax/test_vm_codegen_only.py         |    4 +-
 tests/python/relax/test_vm_cuda_graph.py           |    4 +-
 tests/python/relax/texture/test_texture_nd.py      |    4 +-
 .../python/runtime/test_evaluator_with_preproc.py  |    4 +-
 tests/python/runtime/test_runtime_rpc.py           |    2 +-
 tests/python/te/test_te_create_primfunc.py         |   76 +-
 ...test_tir_analysis_calculate_allocated_memory.py |   18 +-
 .../test_tir_analysis_detect_buffer_access_lca.py  |   14 +-
 .../test_tir_analysis_get_block_access_region.py   |   80 +-
 .../test_tir_analysis_verify_well_formed.py        |   10 +-
 tests/python/tir-base/test_slice_tir.py            |   10 +-
 .../tir-base/test_tir_block_dependence_info.py     |   36 +-
 tests/python/tir-base/test_tir_host_func.py        |    4 +-
 tests/python/tir-base/test_tir_intrin.py           |    2 +-
 tests/python/tir-base/test_tir_ptx_cp_async.py     |    6 +-
 tests/python/tir-base/test_tir_ptx_ldmatrix.py     |    2 +-
 tests/python/tir-base/test_tir_renew_defs.py       |   24 +-
 tests/python/tir-base/test_tir_specialize.py       |   30 +-
 .../python/tir-base/test_tir_te_extern_primfunc.py |   24 +-
 tests/python/tir-base/test_tir_texture_scope.py    |    8 +-
 .../tir-base/test_tir_unsafe_hide_buffer_access.py |   14 +-
 .../tir-schedule/test_tir_schedule_analysis.py     |   32 +-
 .../test_tir_schedule_annotate_buffer_access.py    |   78 +-
 .../tir-schedule/test_tir_schedule_block_scope.py  |   48 +-
 .../tir-schedule/test_tir_schedule_blockize.py     |   82 +-
 .../tir-schedule/test_tir_schedule_cache_index.py  |   22 +-
 .../test_tir_schedule_cache_read_write.py          |  424 ++++----
 .../tir-schedule/test_tir_schedule_compute_at.py   |  508 ++++-----
 .../test_tir_schedule_compute_inline.py            |  390 +++----
 .../test_tir_schedule_decompose_padding.py         |   78 +-
 .../python/tir-schedule/test_tir_schedule_error.py |   12 +-
 .../tir-schedule/test_tir_schedule_for_kind.py     |  156 +--
 .../test_tir_schedule_fuse_reduction_epilogue.py   |   22 +-
 ...ir_schedule_fuse_reduction_epilogue_clipping.py |   26 +-
 ...st_tir_schedule_fuse_reduction_epilogue_relu.py |   22 +-
 .../tir-schedule/test_tir_schedule_instruction.py  |    6 +-
 .../python/tir-schedule/test_tir_schedule_merge.py |   70 +-
 .../tir-schedule/test_tir_schedule_pad_einsum.py   |   56 +-
 .../tir-schedule/test_tir_schedule_partition.py    |  138 +--
 .../test_tir_schedule_read_write_at.py             |   38 +-
 .../tir-schedule/test_tir_schedule_reduction.py    |   92 +-
 .../tir-schedule/test_tir_schedule_reindex.py      |   56 +-
 .../tir-schedule/test_tir_schedule_reorder.py      |   84 +-
 .../test_tir_schedule_reorder_block_iter_var.py    |   12 +-
 .../tir-schedule/test_tir_schedule_rfactor.py      |  326 +++---
 .../test_tir_schedule_rolling_buffer.py            |   98 +-
 .../tir-schedule/test_tir_schedule_sampling.py     |   22 +-
 .../test_tir_schedule_set_axis_separator.py        |   40 +-
 .../tir-schedule/test_tir_schedule_set_dtype.py    |   26 +-
 .../tir-schedule/test_tir_schedule_set_scope.py    |   28 +-
 .../tir-schedule/test_tir_schedule_split_fuse.py   |  134 +--
 .../python/tir-schedule/test_tir_schedule_state.py |   26 +-
 .../test_tir_schedule_state_cached_flags.py        |  242 ++---
 .../test_tir_schedule_storage_align.py             |   38 +-
 .../tir-schedule/test_tir_schedule_tensorize.py    |  106 +-
 .../python/tir-schedule/test_tir_schedule_trace.py |   88 +-
 .../tir-schedule/test_tir_schedule_transform.py    |   18 +-
 .../test_tir_schedule_transform_layout.py          |  188 ++--
 .../tir-schedule/test_tir_schedule_utilities.py    |   94 +-
 .../test_tir_inline_private_functions.py           |    6 +-
 .../test_tir_transform_compact_buffer_region.py    |  280 ++---
 .../test_tir_transform_convert_blocks_to_opaque.py |   14 +-
 .../test_tir_transform_flatten_buffer.py           |    4 +-
 ...test_tir_transform_force_narrow_index_to_i32.py |   18 +-
 .../test_tir_transform_inject_permuted_layout.py   |  116 +--
 .../test_tir_transform_inject_ptx_async_copy.py    |   38 +-
 .../test_tir_transform_inject_software_pipeline.py |  530 +++++-----
 .../test_tir_transform_lift_thread_binding.py      |   38 +-
 ...t_tir_transform_lower_cross_thread_reduction.py |  256 ++---
 .../test_tir_transform_lower_init_block.py         |    8 +-
 .../test_tir_transform_lower_match_buffer.py       |   48 +-
 .../test_tir_transform_lower_opaque_block.py       |   46 +-
 ...transform_manifest_shared_memory_local_stage.py |   28 +-
 ...test_tir_transform_memhammer_lower_auto_copy.py |  198 ++--
 .../test_tir_transform_narrow_datatype.py          |    4 +-
 ...sform_plan_update_buffer_allocation_location.py |   76 +-
 .../test_tir_transform_profiling_instr.py          |   52 +-
 ...transform_remove_weight_layout_rewrite_block.py |   12 +-
 .../test_tir_transform_unify_thread_binding.py     |   40 +-
 .../test_transform_default_gpu_schedule.py         |   74 +-
 tests/python/tvmscript/test_tvmscript_complete.py  |   64 +-
 .../tvmscript/test_tvmscript_error_report.py       |   76 +-
 .../tvmscript/test_tvmscript_ir_builder_tir.py     |   20 +-
 .../tvmscript/test_tvmscript_meta_programming.py   |   10 +-
 tests/python/tvmscript/test_tvmscript_ops.py       |    6 +-
 .../tvmscript/test_tvmscript_parser_source.py      |    2 +-
 .../python/tvmscript/test_tvmscript_parser_tir.py  |   60 +-
 .../tvmscript/test_tvmscript_printer_highlight.py  |    2 +-
 .../test_tvmscript_printer_structural_equal.py     |    4 +-
 .../python/tvmscript/test_tvmscript_printer_tir.py |   26 +-
 .../python/tvmscript/test_tvmscript_regression.py  |    4 +-
 tests/python/tvmscript/test_tvmscript_roundtrip.py |  112 +-
 .../tvmscript/test_tvmscript_syntax_sugar.py       |   36 +-
 tests/python/tvmscript/test_tvmscript_type.py      |   22 +-
 web/package-lock.json                              | 1085 ++++++++++++++++----
 web/package.json                                   |    2 +-
 web/src/webgpu.ts                                  |   13 +
 web/tests/python/relax_rpc_test.py                 |    2 +-
 web/tests/python/webgpu_rpc_test.py                |    2 +-
 494 files changed, 11694 insertions(+), 10841 deletions(-)

Reply via email to