This is an automated email from the ASF dual-hosted git repository.
feiwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new cd04743af7 [KYUUBI #7175] Enhance the performance for metadata cleanup
by leveraging `create_time` index
cd04743af7 is described below
commit cd04743af7720ae20d5d9e9087e838d1bfad44eb
Author: Wang, Fei <[email protected]>
AuthorDate: Wed Aug 27 22:42:53 2025 -0700
[KYUUBI #7175] Enhance the performance for metadata cleanup by leveraging
`create_time` index
### Why are the changes needed?
First, `create_time < end_time` is always true.
So, when cleaning the mtadata, minEndTime = System.currentTimeMillis -
maxAge.
end_time < minEndTime.
And due to `create_time < end_time`, so `create_time < minEndTime` is also
true.
And with `create_time < minEndTime`, its performance is better because it
leverages `create_time` index.
https://github.com/apache/kyuubi/blob/f7e10e65d3aca6fa82171bb3d75b7622c74807b7/kyuubi-server/src/main/resources/sql/sqlite/metadata-store-schema-1.11.0.sqlite.sql#L37
```
mysql> select count(1) from metadata where state in ('FINISHED', 'TIMEOUT',
'CANCELED', 'CLOSED', 'ERROR') and end_time < 1753118619315;
+----------+
| count(1) |
+----------+
| 27741 |
+----------+
1 row in set (2.40 sec)
mysql> select count(1) from metadata where state in ('FINISHED', 'TIMEOUT',
'CANCELED', 'CLOSED', 'ERROR') and end_time < 1753118619315 and create_time <
1753118619315;
+----------+
| count(1) |
+----------+
| 27741 |
+----------+
1 row in set (0.18 sec)
mysql> select count(1) from metadata where end_time>0 and end_time <
1753118619315 and create_time < 1753118619315;
+----------+
| count(1) |
+----------+
| 27741 |
+----------+
1 row in set (0.13 sec)
```
### How was this patch tested?
GA and manually testing.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #7175 from turboFei/delete_fast.
Closes #7175
980a391cd [Wang, Fei] delete fast
Authored-by: Wang, Fei <[email protected]>
Signed-off-by: Wang, Fei <[email protected]>
---
.../org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git
a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala
b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala
index a7226c0a1f..f3c0b2d7de 100644
---
a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala
+++
b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala
@@ -84,8 +84,6 @@ class JDBCMetadataStore(conf: KyuubiConf) extends
MetadataStore with Logging {
new HikariDataSource(hikariConfig)
private val mapper = new ObjectMapper().registerModule(DefaultScalaModule)
- private val terminalStates = OperationState.terminalStates.map(x =>
s"'$x'").mkString(", ")
-
if (conf.get(METADATA_STORE_JDBC_DATABASE_SCHEMA_INIT)) {
initSchema()
}
@@ -413,9 +411,10 @@ class JDBCMetadataStore(conf: KyuubiConf) extends
MetadataStore with Logging {
override def cleanupMetadataByAge(maxAge: Long): Unit = {
val minEndTime = System.currentTimeMillis() - maxAge
- val query = s"DELETE FROM $METADATA_TABLE WHERE state IN ($terminalStates)
AND end_time < ?"
+ val query =
+ s"DELETE FROM $METADATA_TABLE WHERE end_time > 0 AND end_time < ? AND
create_time < ?"
JdbcUtils.withConnection { connection =>
- withUpdateCount(connection, query, minEndTime) { count =>
+ withUpdateCount(connection, query, minEndTime, minEndTime) { count =>
info(s"Cleaned up $count records older than $maxAge ms from
$METADATA_TABLE.")
}
}