This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push: new 515c07937b [DLight] Skip GEMV rules when more than one vector (#17052) 515c07937b is described below commit 515c07937bbf9c0bd7575928217c258caaa5867c Author: Siyuan Feng <hzfen...@sjtu.edu.cn> AuthorDate: Fri May 31 22:26:50 2024 +0800 [DLight] Skip GEMV rules when more than one vector (#17052) The current dlight GEMV rule require only one vector buffer, otherwise raise an error. This PR change this behavior to skip the rule. --- python/tvm/dlight/gpu/gemv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/dlight/gpu/gemv.py b/python/tvm/dlight/gpu/gemv.py index b8a2c6a15f..9ad6f3f89a 100644 --- a/python/tvm/dlight/gpu/gemv.py +++ b/python/tvm/dlight/gpu/gemv.py @@ -206,8 +206,7 @@ class GEMV(GPUScheduleRule): if is_inner_reduction is None: return None elif is_inner_reduction: - self.sch_inner_reduction(sch, target, block, vector_input_buffers, epilogue) - return sch + return self.sch_inner_reduction(sch, target, block, vector_input_buffers, epilogue) elif target.kind.name == "opencl" and "android" in str(target.host): ret = self.sch_outer_reduction(sch, target, block, vector_input_buffers, epilogue) if ret is None: @@ -313,7 +312,8 @@ class GEMV(GPUScheduleRule): # load vector into shared memory, shape should be the whole vector if LOAD_V_SHARED: - assert len(vector_input_buffers) == 1 + if len(vector_input_buffers) != 1: + return None V_shared = sch.cache_read(rf, read_buffer_index=0, storage_scope="shared") sch.compute_at(V_shared, tr, preserve_unit_loops=True) l = sch.get_loops(block=V_shared)[-1]