SourabhBadhya commented on code in PR #4740: URL: https://github.com/apache/hive/pull/4740#discussion_r1361606801
########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/impl/ReadyToCleanAbortHandler.java: ########## @@ -76,7 +76,13 @@ public class ReadyToCleanAbortHandler implements QueryHandler<List<CompactionInf " AND \"res1\".\"TC_TABLE\" = \"res3\".\"CQ_TABLE\" " + " AND (\"res1\".\"TC_PARTITION\" = \"res3\".\"CQ_PARTITION\" " + " OR (\"res1\".\"TC_PARTITION\" IS NULL AND \"res3\".\"CQ_PARTITION\" IS NULL))" + - " WHERE \"res3\".\"RETRY_RECORD_CHECK\" <= 0 OR \"res3\".\"RETRY_RECORD_CHECK\" IS NULL"; + " WHERE (\"res3\".\"RETRY_RECORD_CHECK\" <= 0 OR \"res3\".\"RETRY_RECORD_CHECK\" IS NULL) " + + "AND NOT EXISTS (SELECT 1 " + + " FROM \"TXN_COMPONENTS\" AS \"txns\" " + + " WHERE \"txns\".\"TC_DATABASE\" = \"res1\".\"TC_DATABASE\" AND \"txns\".\"TC_TABLE\" = \"res1\".\"TC_TABLE\" " + + " AND (\"txns\".\"TC_PARTITION\" = \"res1\".\"TC_PARTITION\" " + + " OR (\"txns\".\"TC_PARTITION\" IS NULL AND \"res1\".\"TC_PARTITION\" IS NULL))" + + " AND \"txns\".\"TC_WRITEID\" > \"res1\".\"MAX_ABORTED_WRITE_ID\")"; Review Comment: Why are we comparing records in TXN_COMPONENTS table? I thought the motive was to compare the records in COMPACTION_QUEUE to potentially avoid abort cleanup for tables which already have a COMPACTION_QUEUE record. ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/impl/ReadyToCleanAbortHandler.java: ########## @@ -76,7 +76,13 @@ public class ReadyToCleanAbortHandler implements QueryHandler<List<CompactionInf " AND \"res1\".\"TC_TABLE\" = \"res3\".\"CQ_TABLE\" " + " AND (\"res1\".\"TC_PARTITION\" = \"res3\".\"CQ_PARTITION\" " + " OR (\"res1\".\"TC_PARTITION\" IS NULL AND \"res3\".\"CQ_PARTITION\" IS NULL))" + - " WHERE \"res3\".\"RETRY_RECORD_CHECK\" <= 0 OR \"res3\".\"RETRY_RECORD_CHECK\" IS NULL"; + " WHERE (\"res3\".\"RETRY_RECORD_CHECK\" <= 0 OR \"res3\".\"RETRY_RECORD_CHECK\" IS NULL) " + + "AND NOT EXISTS (SELECT 1 " + + " FROM \"TXN_COMPONENTS\" AS \"txns\" " + + " WHERE \"txns\".\"TC_DATABASE\" = \"res1\".\"TC_DATABASE\" AND \"txns\".\"TC_TABLE\" = \"res1\".\"TC_TABLE\" " + + " AND (\"txns\".\"TC_PARTITION\" = \"res1\".\"TC_PARTITION\" " + + " OR (\"txns\".\"TC_PARTITION\" IS NULL AND \"res1\".\"TC_PARTITION\" IS NULL))" + + " AND \"txns\".\"TC_WRITEID\" > \"res1\".\"MAX_ABORTED_WRITE_ID\")"; Review Comment: Also this query lacks formatting. ########## ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/handler/TestAbortedTxnCleaner.java: ########## @@ -290,49 +341,6 @@ public void testCleaningOfAbortedDirectoriesBelowBase() throws Exception { Assert.assertEquals(1, directories.size()); } - @Test - public void testAbortedCleaningWithThreeTxnsWithDiffWriteIds() throws Exception { - String dbName = "default", tableName = "handler_unpart_writeid_test"; - Table t = newTable(dbName, tableName, false); - - // Add 2 committed deltas and 2 aborted deltas - addDeltaFileWithTxnComponents(t, null, 2, false); - addDeltaFileWithTxnComponents(t, null, 2, true); - addDeltaFileWithTxnComponents(t, null, 2, true); - addDeltaFileWithTxnComponents(t, null, 2, false); - - long openTxnId1 = openTxn(); - long openTxnId2 = openTxn(); - long openTxnId3 = openTxn(); - long writeId2 = ms.allocateTableWriteId(openTxnId2, t.getDbName(), t.getTableName()); - long writeId3 = ms.allocateTableWriteId(openTxnId3, t.getDbName(), t.getTableName()); - long writeId1 = ms.allocateTableWriteId(openTxnId1, t.getDbName(), t.getTableName()); - assert writeId2 < writeId1 && writeId2 < writeId3; - acquireLock(t, null, openTxnId3); - acquireLock(t, null, openTxnId2); - acquireLock(t, null, openTxnId1); - addDeltaFile(t, null, writeId3, writeId3, 2); - addDeltaFile(t, null, writeId1, writeId1, 2); - addDeltaFile(t, null, writeId2, writeId2, 2); - - ms.abortTxns(Collections.singletonList(openTxnId2)); - ms.commitTxn(openTxnId3); - - HiveConf.setIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD, 0); - MetadataCache metadataCache = new MetadataCache(true); - FSRemover mockedFSRemover = Mockito.spy(new FSRemover(conf, ReplChangeManager.getInstance(conf), metadataCache)); - TaskHandler mockedTaskHandler = Mockito.spy(new AbortedTxnCleaner(conf, txnHandler, metadataCache, - false, mockedFSRemover)); - Cleaner cleaner = new Cleaner(); - cleaner.setConf(conf); - cleaner.init(new AtomicBoolean(true)); - cleaner.setCleanupHandlers(Arrays.asList(mockedTaskHandler)); - cleaner.run(); - - List<Path> directories = getDirectories(conf, t, null); - Assert.assertEquals(5, directories.size()); - } - Review Comment: Why this change? As I see, you are just changing the location of this test from one place to another. This corrupts git history. ########## ql/src/test/org/apache/hadoop/hive/metastore/txn/TestCompactionTxnHandler.java: ########## @@ -1055,6 +1055,38 @@ public void testFindReadyToCleanAborts_limitFetchSize() throws Exception { assertEquals(1, potentials.size()); } + @Test + public void testFindReadyToCleanAborts() throws Exception { + long txnId = openTxn(); + + List<LockComponent> components = new ArrayList<>(); + components.add(createLockComponent(LockType.SHARED_WRITE, LockLevel.DB, "mydb", "mytable", "mypartition=myvalue", DataOperationType.UPDATE)); + components.add(createLockComponent(LockType.SHARED_WRITE, LockLevel.DB, "mydb", "yourtable", "mypartition=myvalue", DataOperationType.UPDATE)); + + allocateTableWriteIds("mydb", "mytable", txnId); + allocateTableWriteIds("mydb", "yourtable", txnId); + + LockRequest req = new LockRequest(components, "me", "localhost"); + req.setTxnid(txnId); + LockResponse res = txnHandler.lock(req); + assertSame(res.getState(), LockState.ACQUIRED); + + txnHandler.abortTxn(new AbortTxnRequest((txnId))); + + txnId = openTxn(); + components = new ArrayList<>(); + components.add(createLockComponent(LockType.SHARED_WRITE, LockLevel.DB, "mydb", "mytable", "mypartition=myvalue", DataOperationType.UPDATE)); + allocateTableWriteIds("mydb", "mytable", txnId); + + req = new LockRequest(components, "me", "localhost"); + req.setTxnid(txnId); + res = txnHandler.lock(req); + assertSame(res.getState(), LockState.ACQUIRED); + + List<CompactionInfo> potentials = txnHandler.findReadyToCleanAborts(1, 0); + assertEquals(1, potentials.size()); Review Comment: Asserting on size is dangerous. We never know what is present in this record. Since we are avoiding abort cleanup for a specific table, I think its better to add some table name checks as well. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org