This is an automated email from the ASF dual-hosted git repository.
dengzh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 88e2175b2c0 HIVE-28700: MRCompactor may cause data loss when
performing the major compaction (#5603) (Zhihua Deng, reviewed by Denys
Kuzmenko, Marta Kuczora)
88e2175b2c0 is described below
commit 88e2175b2c04403e35b404634306f60d599c3d83
Author: dengzh <[email protected]>
AuthorDate: Wed Jan 15 11:29:08 2025 +0800
HIVE-28700: MRCompactor may cause data loss when performing the major
compaction (#5603) (Zhihua Deng, reviewed by Denys Kuzmenko, Marta Kuczora)
---
.../hive/ql/txn/compactor/TestCompactor.java | 28 ++++++++++++++++++++++
.../hadoop/hive/ql/txn/compactor/MRCompactor.java | 5 +---
2 files changed, 29 insertions(+), 4 deletions(-)
diff --git
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index afe91493d02..8b93bc42a53 100644
---
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -2574,6 +2574,34 @@ public class TestCompactor extends TestCompactorBase {
Assert.assertEquals("d", rs.get(1));
}
+ @Test
+ public void testMajorCompactionOnBaseMissingBucket() throws Exception {
+ dropTables("full_acid", "ext");
+ HiveConf hiveConf = driver.getConf();
+ String reducers = hiveConf.get("mapreduce.job.reduces");
+ hiveConf.set("mapreduce.job.reduces", "7");
+ executeStatementOnDriver("create table ext (a int)", driver);
+ executeStatementOnDriver("insert into table ext
values(1),(2),(3),(3),(3),(3),(4),(5),(6),(7)", driver);
+ executeStatementOnDriver("create table full_acid(a int) stored as orc
tblproperties('transactional'='true')", driver);
+ executeStatementOnDriver("insert overwrite table full_acid select * from
ext where a = 3", driver);
+ executeStatementOnDriver("insert into table full_acid select * from ext
where a != 3 group by a", driver);
+
+ List<Integer> values1 = new ArrayList<>();
+ List<Integer> values2 = new ArrayList<>();
+ executeStatementOnDriver("select * from full_acid order by a", driver);
+ driver.getResults(values1);
+ executeStatementOnDriver("alter table full_acid compact 'major'", driver);
+ Assert.assertEquals(10, values1.size());
+
+ runWorker(conf);
+ executeStatementOnDriver("select * from full_acid order by a", driver);
+ driver.getResults(values2);
+ Assert.assertEquals(values1, values2);
+
+ hiveConf.set("mapreduce.job.reduces", reducers);
+ dropTables("full_acid", "ext");
+ }
+
@Test
public void testAcidDirCacheOnDropPartitionedTable() throws Exception {
int cacheDurationInMinutes = 10;
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MRCompactor.java
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MRCompactor.java
index 3443ea20455..db5aa26c48e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MRCompactor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MRCompactor.java
@@ -571,12 +571,9 @@ public class MRCompactor implements Compactor {
attemptId = Integer.valueOf(attemptIdString);
}
deltasToAttemptId.put(deltas[i].getName(), attemptId);
- if (baseAttemptId != null) {
- deltasToAttemptId.put(base.getName(), baseAttemptId);
- }
}
if (baseAttemptId != null) {
- deltasToAttemptId.put(base.toString(), baseAttemptId);
+ deltasToAttemptId.put(base.getName(), baseAttemptId);
}
}