This is an automated email from the ASF dual-hosted git repository. github-bot pushed a change to branch nightly in repository https://gitbox.apache.org/repos/asf/tvm.git
from e257fb8a41 [Runtime] CUDA IPC Memory support and custom allreduce kernels (#16750) add 62beb0251e [microNPU][ETHOSU] Add fixed point for tanh (#16266) add 858486fe8e [Relax][Pass] Lowering passes for GPU IPC memory and allreduce (#16759) add f9b38ab711 [SME][Docker] Add Fixed Virtual Platform (FVP) and toolchain install (#16755) add 6c701fe5b8 [Unity][Parser] Check well-formedness in the parser (#16569) add 89cd74c07d [CONTRIB] Add nm symbol dump (#16763) No new revisions were added by this update. Summary of changes: docker/Dockerfile.ci_cpu | 5 + docker/install/ubuntu_install_aprofile_aem.sh | 54 ++++++++ python/tvm/contrib/cc.py | 46 +++++++ python/tvm/contrib/ndk.py | 31 ++++- python/tvm/relax/block_builder.py | 11 +- python/tvm/relax/frontend/nn/modules.py | 44 +++--- python/tvm/relax/op/builtin/builtin.py | 20 ++- python/tvm/relax/transform/__init__.py | 2 + .../tvm/relax/transform/ipc_allreduce_rewrite.py | 150 ++++++++++++++++++++ .../relax/transform/lower_gpu_ipc_alloc_storage.py | 85 ++++++++++++ .../tvm/relay/backend/contrib/ethosu/legalize.py | 131 ++++++++++++++---- python/tvm/relay/op/contrib/ethosu.py | 61 ++++++++- python/tvm/script/ir_builder/ir/ir.py | 8 +- python/tvm/script/parser/core/entry.py | 40 +++++- python/tvm/script/parser/ir/entry.py | 30 +++- python/tvm/script/parser/relax/entry.py | 4 +- python/tvm/script/parser/tir/entry.py | 6 +- python/tvm/testing/utils.py | 9 +- python/tvm/tir/transform/transform.py | 18 ++- src/driver/driver_api.cc | 4 +- src/relax/op/op.cc | 9 +- src/relax/transform/call_tir_rewrite.cc | 24 ++-- src/relax/transform/lower_alloc_tensor.cc | 12 +- src/relax/transform/static_plan_block_memory.cc | 40 ++++-- src/tir/ir/data_type_rewriter.cc | 6 + src/tir/transforms/default_gpu_schedule.cc | 3 +- tests/python/arith/test_arith_domain_touched.py | 5 +- tests/python/codegen/test_inject_ptx_ldg32.py | 13 +- tests/python/contrib/test_ethosu/test_codegen.py | 45 ++++++ .../test_ethosu/test_copy_compute_reordering.py | 45 ++++-- .../contrib/test_ethosu/test_create_tiles.py | 8 -- .../contrib/test_ethosu/test_encode_constants.py | 41 +++--- tests/python/contrib/test_ethosu/test_legalize.py | 45 ++++++ .../contrib/test_ethosu/test_merge_constants.py | 48 ++++--- .../test_ethosu/test_remove_concatenates.py | 6 +- .../contrib/test_ethosu/test_replace_conv2d.py | 54 ++++---- .../contrib/test_ethosu/test_replace_copy.py | 12 +- tests/python/contrib/test_ethosu/test_scheduler.py | 4 +- .../test_ethosu/test_tir_to_cs_translator.py | 40 ++---- tests/python/contrib/test_ethosu/test_vela_api.py | 9 +- .../contrib/test_hexagon/test_dma_builtin.py | 2 +- .../test_relax_2d_buffer_allocation.py | 2 +- tests/python/dlight/test_benchmark.py | 4 +- tests/python/integration/test_lower.py | 3 +- .../python/micro/test_aot_legalize_packed_call.py | 5 +- .../test_distributed_transform_lower_distir.py | 4 +- ...est_distributed_transform_propagate_sharding.py | 4 +- tests/python/relax/test_analysis.py | 22 +-- .../relax/test_analysis_estimate_memory_usage.py | 2 +- tests/python/relax/test_ast_printer.py | 2 +- tests/python/relax/test_dataflow_pattern.py | 2 +- tests/python/relax/test_frontend_nn_modules.py | 31 +++-- ...runtime_builtin_paged_attention_kv_cache_tir.py | 23 ++-- .../relax/test_transform_ipc_allreduce_rewrite.py | 151 +++++++++++++++++++++ .../test_transform_lower_gpu_ipc_alloc_storage.py | 97 +++++++++++++ tests/python/relax/test_transform_normalize.py | 4 +- .../relax/test_transform_normalize_global_var.py | 4 +- ...st_transform_operator_specific_normalization.py | 33 ++--- .../relax/test_transform_rewrite_cuda_graph.py | 4 +- tests/python/relax/test_tvmscript_parser.py | 16 +-- .../relax/test_vm_alloc_storage_with_scope.py | 2 +- tests/python/relax/test_vm_codegen_only.py | 19 +-- tests/python/relax/test_vm_codegen_tir.py | 8 +- tests/python/relax/test_vm_cuda_graph.py | 4 +- .../test_tir_analysis_identify_memcpy.py | 1 + tests/python/tir-analysis/test_tir_analysis_oob.py | 3 +- .../test_tir_analysis_verify_well_formed.py | 12 +- tests/python/tir-base/test_tir_renew_defs.py | 4 +- tests/python/tir-base/test_tir_specialize.py | 8 +- .../tir-schedule/test_tir_schedule_rfactor.py | 3 +- .../test_tir_transform_common_subexpr_elim.py | 17 +-- .../test_tir_transform_convert_blocks_to_opaque.py | 2 + .../test_tir_transform_convert_ssa.py | 11 +- .../test_tir_transform_fp8_legalize.py | 9 +- .../test_tir_transform_inject_rolling_buffer.py | 144 ++++++++++++++++---- ...t_tir_transform_lower_cross_thread_reduction.py | 18 ++- .../test_tir_transform_lower_match_buffer.py | 6 +- ...form_merge_dynamic_shared_memory_allocations.py | 4 +- .../tir-transform/test_tir_transform_simplify.py | 17 ++- .../test_tir_transform_storage_flatten.py | 2 +- tests/python/tir-usmp/test_tir_usmp_algo.py | 2 - .../test_tir_usmp_analysis_extract_bufferinfo.py | 5 +- ...ransform_convert_pool_allocations_to_offsets.py | 1 - tests/python/tir-usmp/test_tir_usmp_utils.py | 1 - .../python/tvmscript/test_tvmscript_parser_tir.py | 4 +- tests/python/tvmscript/test_tvmscript_roundtrip.py | 50 ++++--- 86 files changed, 1544 insertions(+), 456 deletions(-) create mode 100755 docker/install/ubuntu_install_aprofile_aem.sh create mode 100644 python/tvm/relax/transform/ipc_allreduce_rewrite.py create mode 100644 python/tvm/relax/transform/lower_gpu_ipc_alloc_storage.py create mode 100644 tests/python/relax/test_transform_ipc_allreduce_rewrite.py create mode 100644 tests/python/relax/test_transform_lower_gpu_ipc_alloc_storage.py