This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 21e1380063 [Hotfix] Revert driver API pass ordering that breaks MLC, 
mark failing test (#16770)
21e1380063 is described below

commit 21e1380063c130203e3557d4a742c51d3ef593c6
Author: Steven S. Lyubomirsky <slyubomir...@octoml.ai>
AuthorDate: Sat Mar 23 10:03:09 2024 -0400

    [Hotfix] Revert driver API pass ordering that breaks MLC, mark failing test 
(#16770)
    
    * Revert changes that cause failures in MLC, mark and skip the failing tests
    
    
    
    * Restore changes unrelated to driver API reordering
---
 src/driver/driver_api.cc                                            | 4 +++-
 .../tir-transform/test_tir_transform_inject_ptx_async_copy.py       | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/driver/driver_api.cc b/src/driver/driver_api.cc
index e3b4a5a651..33b4514e6b 100644
--- a/src/driver/driver_api.cc
+++ b/src/driver/driver_api.cc
@@ -590,7 +590,6 @@ transform::Sequential MixedModulePassManager(IRModule 
mixed_mod, Target target)
 
   mixed_pass_list.push_back(tir::transform::ThreadSync("shared"));
   mixed_pass_list.push_back(tir::transform::ThreadSync("shared.dyn"));
-  mixed_pass_list.push_back(tir::transform::MergeSharedMemoryAllocations());
   mixed_pass_list.push_back(tir::transform::ThreadSync("warp"));
   mixed_pass_list.push_back(tir::transform::InferFragment());
   mixed_pass_list.push_back(tir::transform::LowerThreadAllreduce());
@@ -608,6 +607,9 @@ transform::Sequential MixedModulePassManager(IRModule 
mixed_mod, Target target)
 
   mixed_pass_list.push_back(tir::transform::AnnotateDeviceRegions());
   mixed_pass_list.push_back(tir::transform::SplitHostDevice());
+  // MergeSharedMemoryAllocations must be applied after SplitHostDevice
+  // because the merged allocation site is at the beginning of each device 
function
+  mixed_pass_list.push_back(tir::transform::MergeSharedMemoryAllocations());
 
   bool unpacked_api = mixed_mod->GetAttr<relay::Executor>(tvm::attr::kExecutor)
                           .value_or(relay::Executor::Create("graph", {}))
diff --git 
a/tests/python/tir-transform/test_tir_transform_inject_ptx_async_copy.py 
b/tests/python/tir-transform/test_tir_transform_inject_ptx_async_copy.py
index c52aca7674..4c94dc04cc 100644
--- a/tests/python/tir-transform/test_tir_transform_inject_ptx_async_copy.py
+++ b/tests/python/tir-transform/test_tir_transform_inject_ptx_async_copy.py
@@ -482,6 +482,12 @@ def 
test_cp_async_in_if_then_else(postproc_if_missing_async_support):
     assert generated_code == expected_cuda_script
 
 
+@pytest.mark.skip(
+    reason="This test fails due to an ordering issue with 
MergeSharedMemoryAllocations "
+    "in device_driver_api.cc. However, fixing this causes failures in MLC. "
+    "This bug should be addressed. See discussion in 
https://github.com/apache/tvm/pull/16769 "
+    "and https://github.com/apache/tvm/pull/16569#issuecomment-1992720448";
+)
 @tvm.testing.requires_cuda
 def test_vectorize_cp_async_in_if_then_else(postproc_if_missing_async_support):
     @T.prim_func

Reply via email to