This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/unity by this push:
new ac568eb30a [Unity] Fix PagedKVCache per FlashInfer update (#16317)
ac568eb30a is described below
commit ac568eb30a4e19d51fc9ef9b7ca5642a4f589fde
Author: Ruihang Lai <[email protected]>
AuthorDate: Tue Jan 2 01:15:37 2024 -0500
[Unity] Fix PagedKVCache per FlashInfer update (#16317)
This PR fixes PagedKVCache due to recent FlashInfer interface
change, and also bumps FlashInfer to the latest.
---
3rdparty/flashinfer | 2 +-
src/runtime/relax_vm/paged_kv_cache.cc | 10 ++++------
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/3rdparty/flashinfer b/3rdparty/flashinfer
index 8d987b98f7..ae1a6501ca 160000
--- a/3rdparty/flashinfer
+++ b/3rdparty/flashinfer
@@ -1 +1 @@
-Subproject commit 8d987b98f7f7b9381097566643a7f53c99cf312d
+Subproject commit ae1a6501ca69e59c982bd196fc87514ab3f1773e
diff --git a/src/runtime/relax_vm/paged_kv_cache.cc
b/src/runtime/relax_vm/paged_kv_cache.cc
index fc7d351e5b..e941908dbc 100644
--- a/src/runtime/relax_vm/paged_kv_cache.cc
+++ b/src/runtime/relax_vm/paged_kv_cache.cc
@@ -848,9 +848,8 @@ class PagedAttentionKVCacheObj : public AttentionKVCache {
if (num_depths_ == 1) {
if (use_decode_kernel_[0]) {
f_attention_decode_begin_forward_(
- /*depth=*/0, page_indptr_on_depths_view_[0],
page_indices_on_depths_view_[0],
- last_page_len_on_depths_view_[0], /*return_lse=*/true,
num_qo_heads_, num_kv_heads_,
- head_dim_, page_size_, /*rotary_mode=*/true);
+ /*depth=*/0, page_indptr_on_depths_view_[0],
last_page_len_on_depths_view_[0],
+ num_qo_heads_, num_kv_heads_, head_dim_, page_size_,
/*rotary_mode=*/true);
} else {
f_attention_prefill_begin_forward_(/*depth=*/0,
qo_indptr_on_depths_view_[0],
cur_batch_size_, num_qo_heads_,
num_kv_heads_);
@@ -864,9 +863,8 @@ class PagedAttentionKVCacheObj : public AttentionKVCache {
}
if (use_decode_kernel_[d]) {
f_attention_decode_begin_forward_(
- d, page_indptr_on_depths_view_[d],
page_indices_on_depths_view_[d],
- last_page_len_on_depths_view_[d], /*rotary_mode=*/false,
num_qo_heads_, num_kv_heads_,
- head_dim_, page_size_, /*return_lse=*/true);
+ d, page_indptr_on_depths_view_[d],
last_page_len_on_depths_view_[d], num_qo_heads_,
+ num_kv_heads_, head_dim_, page_size_, /*rotary_mode=*/false);
} else {
f_attention_prefill_begin_forward_(/*depth=*/d,
qo_indptr_on_depths_view_[d],
last_page_len_on_depths_view_[d]->shape[0],