This is an automated email from the ASF dual-hosted git repository. masahi pushed a commit to branch unity in repository https://gitbox.apache.org/repos/asf/tvm.git
commit 8e448d8f295fcbed1b161bdd4d220434ef9019fd Author: Masahiro Masuda <masahi...@gmail.com> AuthorDate: Wed Nov 29 16:40:18 2023 +0000 window_size_left should be max_seqlen_k, not num_key for the var_len case --- python/tvm/contrib/cutlass/attention_operation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tvm/contrib/cutlass/attention_operation.py b/python/tvm/contrib/cutlass/attention_operation.py index eacc0ec37a..518778ec52 100644 --- a/python/tvm/contrib/cutlass/attention_operation.py +++ b/python/tvm/contrib/cutlass/attention_operation.py @@ -324,7 +324,8 @@ def instantiate_flash_attention_var_len_template(attrs): o_row_stride, ${scale}, ${is_causal}, - ${window_size_left}, + // For SWA, is_causal must be false. + ${is_causal} ? _max_seqlen_k : ${window_size_left}, ${window_size_right}, stream); """