[ https://issues.apache.org/jira/browse/HIVE-22977?focusedWorklogId=838346&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-838346 ]
ASF GitHub Bot logged work on HIVE-22977: ----------------------------------------- Author: ASF GitHub Bot Created on: 10/Jan/23 14:59 Start Date: 10/Jan/23 14:59 Worklog Time Spent: 10m Work Description: SourabhBadhya commented on code in PR #3801: URL: https://github.com/apache/hive/pull/3801#discussion_r1065888532 ########## itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java: ########## @@ -2951,4 +2952,170 @@ public void testStatsAfterCompactionPartTbl(boolean isQueryBased, boolean isAuto Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows")); executeStatementOnDriver("drop table if exists " + tblName, driver); } + + @Test + public void testMajorCompactionWithMergeNotPartitionedWithoutBuckets() throws Exception { + testCompactionWithMerge(CompactionType.MAJOR, false, false, null, Collections.singletonList("bucket_00000"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("base_0000003_v0000007"), true, true); + } + + @Test + public void testMajorCompactionWithMergePartitionedWithoutBuckets() throws Exception { + testCompactionWithMerge(CompactionType.MAJOR, true, false, "ds=today", Collections.singletonList("bucket_00000"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("base_0000003_v0000007"), true, true); + } + + @Test + public void testMajorCompactionWithMergeNotPartitionedWithBuckets() throws Exception { + testCompactionWithMerge(CompactionType.MAJOR, false, true, null, Arrays.asList("bucket_00000", "bucket_00001"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("base_0000003_v0000007"), true, true); + } + + @Test + public void testMajorCompactionWithMergerPartitionedWithBuckets() throws Exception { + testCompactionWithMerge(CompactionType.MAJOR, true, true, "ds=today", Arrays.asList("bucket_00000", "bucket_00001"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("base_0000003_v0000007"), true, true); + } + + @Test + public void testMinorCompactionWithMergeNotPartitionedWithoutBuckets() throws Exception { + testCompactionWithMerge(CompactionType.MINOR, false, false, null, + Collections.singletonList("bucket_00000"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("delta_0000001_0000003_v0000007"), true, true); + } + + @Test + public void testMinorCompactionWithMergePartitionedWithoutBuckets() throws Exception { + testCompactionWithMerge(CompactionType.MINOR, true, false, "ds=today", + Collections.singletonList("bucket_00000"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("delta_0000001_0000003_v0000007"), true, true); + } + + @Test + public void testMinorCompactionWithMergeNotPartitionedWithBuckets() throws Exception { + testCompactionWithMerge(CompactionType.MINOR, false, true, null, + Arrays.asList("bucket_00000", "bucket_00001"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("delta_0000001_0000003_v0000007"), true, true); + } + + @Test + public void testMinorCompactionWithMergePartitionedWithBuckets() throws Exception { + testCompactionWithMerge(CompactionType.MINOR, true, true, "ds=today", + Arrays.asList("bucket_00000", "bucket_00001"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("delta_0000001_0000003_v0000007"), true, true); + } + + @Test + public void testMajorCompactionAfterMinorWithMerge() throws Exception { + testCompactionWithMerge(CompactionType.MINOR, true, true, "ds=today", + Arrays.asList("bucket_00000", "bucket_00001"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("delta_0000001_0000003_v0000007"),true, false); + testCompactionWithMerge(CompactionType.MAJOR, true, true, "ds=today", + Arrays.asList("bucket_00000", "bucket_00001"), + Arrays.asList("delta_0000001_0000003_v0000007", "delta_0000004_0000004_0000", "delta_0000005_0000005_0000", + "delta_0000006_0000006_0000"), Collections.singletonList("base_0000006_v0000014"), false, true); + } + + @Test + public void testMinorCompactionAfterMajorWithMerge() throws Exception { + testCompactionWithMerge(CompactionType.MAJOR, false, false, null, + Collections.singletonList("bucket_00000"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("base_0000003_v0000007"), true, false); + testCompactionWithMerge(CompactionType.MINOR, false, false, null, + Collections.singletonList("bucket_00000"), + Arrays.asList("delta_0000004_0000004_0000", "delta_0000005_0000005_0000", "delta_0000006_0000006_0000"), + Collections.singletonList("delta_0000001_0000006_v0000014"), false, true); + } + + @Test + public void testMultipleMajorCompactionWithMerge() throws Exception { + testCompactionWithMerge(CompactionType.MAJOR, true, true, "ds=today", + Arrays.asList("bucket_00000", "bucket_00001"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("base_0000003_v0000007"), true, false); + testCompactionWithMerge(CompactionType.MAJOR, true, true, "ds=today", + Arrays.asList("bucket_00000", "bucket_00001"), + Arrays.asList("delta_0000004_0000004_0000", "delta_0000005_0000005_0000", "delta_0000006_0000006_0000"), + Collections.singletonList("base_0000006_v0000014"), false, true); + } + + @Test + public void testMultipleMinorCompactionWithMerge() throws Exception { + testCompactionWithMerge(CompactionType.MINOR, false, false, null, + Collections.singletonList("bucket_00000"), + Arrays.asList("delta_0000001_0000001_0000", "delta_0000002_0000002_0000", "delta_0000003_0000003_0000"), + Collections.singletonList("delta_0000001_0000003_v0000007"), true, false); + testCompactionWithMerge(CompactionType.MINOR, false, false, null, + Collections.singletonList("bucket_00000"), + Arrays.asList("delta_0000001_0000003_v0000007", "delta_0000004_0000004_0000", "delta_0000005_0000005_0000", + "delta_0000006_0000006_0000"), + Collections.singletonList("delta_0000001_0000006_v0000014"), false, true); + } + + private void testCompactionWithMerge(CompactionType compactionType, boolean isPartitioned, boolean isBucketed, Review Comment: Added tests which use only Merge compaction and tests which use fallback to query-based compaction using mocking. Done. Issue Time Tracking ------------------- Worklog Id: (was: 838346) Time Spent: 3h 20m (was: 3h 10m) > Merge delta files instead of running a query in major/minor compaction > ---------------------------------------------------------------------- > > Key: HIVE-22977 > URL: https://issues.apache.org/jira/browse/HIVE-22977 > Project: Hive > Issue Type: Improvement > Reporter: László Pintér > Assignee: Sourabh Badhya > Priority: Major > Labels: pull-request-available > Attachments: HIVE-22977.01.patch, HIVE-22977.02.patch > > Time Spent: 3h 20m > Remaining Estimate: 0h > > [Compaction Optimiziation] > We should analyse the possibility to move a delta file instead of running a > major/minor compaction query. > Please consider the following use cases: > - full acid table but only insert queries were run. This means that no > delete delta directories were created. Is it possible to merge the delta > directory contents without running a compaction query? > - full acid table, initiating queries through the streaming API. If there > are no abort transactions during the streaming, is it possible to merge the > delta directory contents without running a compaction query? -- This message was sent by Atlassian Jira (v8.20.10#820010)