This is an automated email from the ASF dual-hosted git repository.

feiwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new cd04743af7 [KYUUBI #7175] Enhance the performance for metadata cleanup 
by leveraging `create_time` index
cd04743af7 is described below

commit cd04743af7720ae20d5d9e9087e838d1bfad44eb
Author: Wang, Fei <[email protected]>
AuthorDate: Wed Aug 27 22:42:53 2025 -0700

    [KYUUBI #7175] Enhance the performance for metadata cleanup by leveraging 
`create_time` index
    
    ### Why are the changes needed?
    
    First, `create_time < end_time` is always true.
    
    So, when cleaning the mtadata,  minEndTime = System.currentTimeMillis - 
maxAge.
    
    end_time < minEndTime.
    
    And due to  `create_time < end_time`, so `create_time < minEndTime` is also 
true.
    
    And with  `create_time < minEndTime`, its performance is better because it 
leverages `create_time` index.
    
    
https://github.com/apache/kyuubi/blob/f7e10e65d3aca6fa82171bb3d75b7622c74807b7/kyuubi-server/src/main/resources/sql/sqlite/metadata-store-schema-1.11.0.sqlite.sql#L37
    
    ```
    mysql> select count(1) from metadata where state in ('FINISHED', 'TIMEOUT', 
'CANCELED', 'CLOSED', 'ERROR') and end_time < 1753118619315;
    +----------+
    | count(1) |
    +----------+
    |    27741 |
    +----------+
    1 row in set (2.40 sec)
    
    mysql> select count(1) from metadata where state in ('FINISHED', 'TIMEOUT', 
'CANCELED', 'CLOSED', 'ERROR') and end_time < 1753118619315 and create_time < 
1753118619315;
    +----------+
    | count(1) |
    +----------+
    |    27741 |
    +----------+
    1 row in set (0.18 sec)
    
    mysql> select count(1) from metadata where end_time>0 and end_time < 
1753118619315 and create_time < 1753118619315;
    +----------+
    | count(1) |
    +----------+
    |    27741 |
    +----------+
    1 row in set (0.13 sec)
    ```
    
    ### How was this patch tested?
    
    GA and manually testing.
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #7175 from turboFei/delete_fast.
    
    Closes #7175
    
    980a391cd [Wang, Fei] delete fast
    
    Authored-by: Wang, Fei <[email protected]>
    Signed-off-by: Wang, Fei <[email protected]>
---
 .../org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala
 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala
index a7226c0a1f..f3c0b2d7de 100644
--- 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala
+++ 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala
@@ -84,8 +84,6 @@ class JDBCMetadataStore(conf: KyuubiConf) extends 
MetadataStore with Logging {
     new HikariDataSource(hikariConfig)
   private val mapper = new ObjectMapper().registerModule(DefaultScalaModule)
 
-  private val terminalStates = OperationState.terminalStates.map(x => 
s"'$x'").mkString(", ")
-
   if (conf.get(METADATA_STORE_JDBC_DATABASE_SCHEMA_INIT)) {
     initSchema()
   }
@@ -413,9 +411,10 @@ class JDBCMetadataStore(conf: KyuubiConf) extends 
MetadataStore with Logging {
 
   override def cleanupMetadataByAge(maxAge: Long): Unit = {
     val minEndTime = System.currentTimeMillis() - maxAge
-    val query = s"DELETE FROM $METADATA_TABLE WHERE state IN ($terminalStates) 
AND end_time < ?"
+    val query =
+      s"DELETE FROM $METADATA_TABLE WHERE end_time > 0 AND end_time < ? AND 
create_time < ?"
     JdbcUtils.withConnection { connection =>
-      withUpdateCount(connection, query, minEndTime) { count =>
+      withUpdateCount(connection, query, minEndTime, minEndTime) { count =>
         info(s"Cleaned up $count records older than $maxAge ms from 
$METADATA_TABLE.")
       }
     }

Reply via email to