This is an automated email from the ASF dual-hosted git repository.

junrushao pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/unity by this push:
     new ac568eb30a [Unity] Fix PagedKVCache per FlashInfer update (#16317)
ac568eb30a is described below

commit ac568eb30a4e19d51fc9ef9b7ca5642a4f589fde
Author: Ruihang Lai <[email protected]>
AuthorDate: Tue Jan 2 01:15:37 2024 -0500

    [Unity] Fix PagedKVCache per FlashInfer update (#16317)
    
    This PR fixes PagedKVCache due to recent FlashInfer interface
    change, and also bumps FlashInfer to the latest.
---
 3rdparty/flashinfer                    |  2 +-
 src/runtime/relax_vm/paged_kv_cache.cc | 10 ++++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/3rdparty/flashinfer b/3rdparty/flashinfer
index 8d987b98f7..ae1a6501ca 160000
--- a/3rdparty/flashinfer
+++ b/3rdparty/flashinfer
@@ -1 +1 @@
-Subproject commit 8d987b98f7f7b9381097566643a7f53c99cf312d
+Subproject commit ae1a6501ca69e59c982bd196fc87514ab3f1773e
diff --git a/src/runtime/relax_vm/paged_kv_cache.cc 
b/src/runtime/relax_vm/paged_kv_cache.cc
index fc7d351e5b..e941908dbc 100644
--- a/src/runtime/relax_vm/paged_kv_cache.cc
+++ b/src/runtime/relax_vm/paged_kv_cache.cc
@@ -848,9 +848,8 @@ class PagedAttentionKVCacheObj : public AttentionKVCache {
     if (num_depths_ == 1) {
       if (use_decode_kernel_[0]) {
         f_attention_decode_begin_forward_(
-            /*depth=*/0, page_indptr_on_depths_view_[0], 
page_indices_on_depths_view_[0],
-            last_page_len_on_depths_view_[0], /*return_lse=*/true, 
num_qo_heads_, num_kv_heads_,
-            head_dim_, page_size_, /*rotary_mode=*/true);
+            /*depth=*/0, page_indptr_on_depths_view_[0], 
last_page_len_on_depths_view_[0],
+            num_qo_heads_, num_kv_heads_, head_dim_, page_size_, 
/*rotary_mode=*/true);
       } else {
         f_attention_prefill_begin_forward_(/*depth=*/0, 
qo_indptr_on_depths_view_[0],
                                            cur_batch_size_, num_qo_heads_, 
num_kv_heads_);
@@ -864,9 +863,8 @@ class PagedAttentionKVCacheObj : public AttentionKVCache {
         }
         if (use_decode_kernel_[d]) {
           f_attention_decode_begin_forward_(
-              d, page_indptr_on_depths_view_[d], 
page_indices_on_depths_view_[d],
-              last_page_len_on_depths_view_[d], /*rotary_mode=*/false, 
num_qo_heads_, num_kv_heads_,
-              head_dim_, page_size_, /*return_lse=*/true);
+              d, page_indptr_on_depths_view_[d], 
last_page_len_on_depths_view_[d], num_qo_heads_,
+              num_kv_heads_, head_dim_, page_size_, /*rotary_mode=*/false);
         } else {
           f_attention_prefill_begin_forward_(/*depth=*/d, 
qo_indptr_on_depths_view_[d],
                                              
last_page_len_on_depths_view_[d]->shape[0],

Reply via email to