deniskuzZ commented on code in PR #4313:
URL: https://github.com/apache/hive/pull/4313#discussion_r1217935063
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java:
##########
@@ -92,6 +94,52 @@ class CompactionTxnHandler extends TxnHandler {
"DELETE FROM \"TXNS\" WHERE \"TXN_ID\" NOT IN (SELECT \"TC_TXNID\" FROM
\"TXN_COMPONENTS\") " +
"AND (\"TXN_STATE\" = " + TxnStatus.ABORTED + " OR \"TXN_STATE\" = "
+ TxnStatus.COMMITTED + ") " +
"AND \"TXN_ID\" < ?";
+
+ // Three inner sub-queries which are under union all and gets the max values
for corresponding db, table, partition.
+ private static final String SELECT_ABORTS_WITH_MIN_OPEN_WRITETXN_QUERY =
+ "SELECT \"DB\", \"TBL\", \"PART\", MAX(\"MIN_TXN_START_TIME\"),
MAX(\"ABORTED_TXN_COUNT\"), " +
+ " MAX(\"MIN_OPEN_WRITE_TXNID\"), MAX(\"RETRY_RETENTION\"),
MAX(\"RETRY_RECORD_CHECK\") FROM ( " +
+ // First sub-query - Gets the aborted txns with min txn start time,
number of aborted txns
+ // for corresponding db, table, partition.
+ " SELECT \"TC_DATABASE\" AS \"DB\", \"TC_TABLE\" AS \"TBL\",
\"TC_PARTITION\" AS \"PART\", " +
+ " MIN(\"TXN_STARTED\") AS \"MIN_TXN_START_TIME\", COUNT(*) AS
\"ABORTED_TXN_COUNT\", " +
+ " 0 AS \"MIN_OPEN_WRITE_TXNID\", -1 AS \"RETRY_RETENTION\", 0 AS
\"RETRY_RECORD_CHECK\" " +
Review Comment:
updated the query, please check
````
SELECT
DB, TBL, PART,
MAX(MIN_TXN_START_TIME),
MAX(ABORTED_TXN_COUNT) ABORTED_TXN_COUNT,
MAX(MIN_OPEN_WRITE_TXNID),
MAX(RETRY_RETENTION),
MAX(RETRY_RECORD_CHECK) FROM (
// First sub-query - Gets the aborted txns with min txn start time,
number of aborted txns
// for corresponding db, table, partition.
SELECT
TC_DATABASE AS DB, TC_TABLE AS TBL, TC_PARTITION AS PART,
MIN(TXN_STARTED) AS MIN_TXN_START_TIME,
COUNT(*) AS ABORTED_TXN_COUNT,
-1 AS MIN_OPEN_WRITE_TXNID,
-1 AS RETRY_RETENTION,
0 AS RETRY_RECORD_CHECK
FROM TXNS, TXN_COMPONENTS
WHERE TXN_ID = TC_TXNID AND TXN_STATE = TxnStatus.ABORTED
GROUP BY TC_DATABASE, TC_TABLE, TC_PARTITION
(checkAbortedTimeThreshold ? "" : " HAVING ABORTED_TXN_COUNT > " +
abortedThreshold)
UNION ALL
// Second sub-query - Gets the min open txn id for corresponding db,
table, partition.
SELECT
TC_DATABASE AS DB, TC_TABLE AS TBL, TC_PARTITION AS PART,
-1 AS MIN_TXN_START_TIME,
0 AS ABORTED_TXN_COUNT,
MIN(TC_TXNID) AS MIN_OPEN_WRITE_TXNID,
-1 AS RETRY_RETENTION,
0 AS RETRY_RECORD_CHECK
FROM TXNS, TXN_COMPONENTS
WHERE TXN_ID = TC_TXNID AND TXN_STATE = TxnStatus.OPEN
GROUP BY TC_DATABASE, TC_TABLE, TC_PARTITION
UNION ALL
// Third sub-query - Gets the retry entries for corresponding db, table,
partition.
SELECT
CQ_DATABASE AS DB, CQ_TABLE AS TBL, CQ_PARTITION AS PART,
-1 AS MIN_TXN_START_TIME,
1 AS ABORTED_TXN_COUNT,
-1 AS MIN_OPEN_WRITE_TXNID,
MAX(CQ_RETRY_RETENTION) AS RETRY_RETENTION,
MIN(CQ_COMMIT_TIME) - %s + MAX(CQ_RETRY_RETENTION) AS
RETRY_RECORD_CHECK
FROM COMPACTION_QUEUE
WHERE CQ_TYPE = quoteChar(TxnStore.ABORT_CLEANUP_TYPE)
GROUP BY CQ_DATABASE, CQ_TABLE, CQ_PARTITION
) res
GROUP BY DB, TBL, PART
HAVING ABORTED_TXN_COUNT > 0;
````
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]