This is an automated email from the ASF dual-hosted git repository.

dengzh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 88e2175b2c0 HIVE-28700: MRCompactor may cause data loss when 
performing the major compaction (#5603) (Zhihua Deng, reviewed by Denys 
Kuzmenko,  Marta Kuczora)
88e2175b2c0 is described below

commit 88e2175b2c04403e35b404634306f60d599c3d83
Author: dengzh <[email protected]>
AuthorDate: Wed Jan 15 11:29:08 2025 +0800

    HIVE-28700: MRCompactor may cause data loss when performing the major 
compaction (#5603) (Zhihua Deng, reviewed by Denys Kuzmenko,  Marta Kuczora)
---
 .../hive/ql/txn/compactor/TestCompactor.java       | 28 ++++++++++++++++++++++
 .../hadoop/hive/ql/txn/compactor/MRCompactor.java  |  5 +---
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index afe91493d02..8b93bc42a53 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -2574,6 +2574,34 @@ public class TestCompactor extends TestCompactorBase {
     Assert.assertEquals("d", rs.get(1));
   }
 
+  @Test
+  public void testMajorCompactionOnBaseMissingBucket() throws Exception {
+    dropTables("full_acid", "ext");
+    HiveConf hiveConf = driver.getConf();
+    String reducers = hiveConf.get("mapreduce.job.reduces");
+    hiveConf.set("mapreduce.job.reduces", "7");
+    executeStatementOnDriver("create table ext (a int)", driver);
+    executeStatementOnDriver("insert into table ext 
values(1),(2),(3),(3),(3),(3),(4),(5),(6),(7)", driver);
+    executeStatementOnDriver("create table full_acid(a int) stored as orc 
tblproperties('transactional'='true')", driver);
+    executeStatementOnDriver("insert overwrite table full_acid select * from 
ext where a  = 3", driver);
+    executeStatementOnDriver("insert into table full_acid select * from ext 
where a != 3 group by a", driver);
+
+    List<Integer> values1 = new ArrayList<>();
+    List<Integer> values2 = new ArrayList<>();
+    executeStatementOnDriver("select * from full_acid order by a", driver);
+    driver.getResults(values1);
+    executeStatementOnDriver("alter table full_acid compact 'major'", driver);
+    Assert.assertEquals(10, values1.size());
+
+    runWorker(conf);
+    executeStatementOnDriver("select * from full_acid order by a", driver);
+    driver.getResults(values2);
+    Assert.assertEquals(values1, values2);
+
+    hiveConf.set("mapreduce.job.reduces", reducers);
+    dropTables("full_acid", "ext");
+  }
+
   @Test
   public void testAcidDirCacheOnDropPartitionedTable() throws Exception {
     int cacheDurationInMinutes = 10;
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MRCompactor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MRCompactor.java
index 3443ea20455..db5aa26c48e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MRCompactor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MRCompactor.java
@@ -571,12 +571,9 @@ public class MRCompactor implements Compactor {
           attemptId = Integer.valueOf(attemptIdString);
         }
         deltasToAttemptId.put(deltas[i].getName(), attemptId);
-        if (baseAttemptId != null) {
-          deltasToAttemptId.put(base.getName(), baseAttemptId);
-        }
       }
       if (baseAttemptId != null) {
-        deltasToAttemptId.put(base.toString(), baseAttemptId);
+        deltasToAttemptId.put(base.getName(), baseAttemptId);
       }
     }
 

Reply via email to