This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 8d2dceb27e2 HIVE-28960: Compaction Stats updater does not collect
column stats when hive.stats.autogather is true (#5822)
8d2dceb27e2 is described below
commit 8d2dceb27e29905570dbade3bc66596f86943e2f
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue May 27 14:42:42 2025 +0200
HIVE-28960: Compaction Stats updater does not collect column stats when
hive.stats.autogather is true (#5822)
---
.../ql/txn/compactor/TestCrudCompactorOnTez.java | 44 ++++++++++++++++++++++
.../hadoop/hive/ql/txn/compactor/StatsUpdater.java | 20 +++++++++-
2 files changed, 62 insertions(+), 2 deletions(-)
diff --git
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
index 6f662f2cfee..035a1f9ca2c 100644
---
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
+++
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
@@ -38,6 +38,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
@@ -65,6 +66,7 @@
import org.apache.hadoop.hive.ql.io.BucketCodec;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
import org.apache.hive.streaming.HiveStreamingConnection;
import org.apache.hive.streaming.StreamingConnection;
@@ -93,6 +95,9 @@
@SuppressWarnings("deprecation")
public class TestCrudCompactorOnTez extends CompactorOnTezTest {
+ private static final String DB = "default";
+ private static final String TABLE1 = "t1";
+
@Test
public void
testRebalanceCompactionWithParallelDeleteAsSecondOptimisticLock() throws
Exception {
testRebalanceCompactionWithParallelDeleteAsSecond(true);
@@ -3665,4 +3670,43 @@ public void
testFallbackForMergeCompactionWhenDeleteDeltaPresent() throws Except
verify(primary.get(), times(1)).run(any());
verify(secondary.get(), times(1)).run(any());
}
+
+ @Test
+ public void testMajorCompactionUpdateMissingColumnStats() throws Exception {
+ executeStatementOnDriver("drop table if exists " + TABLE1, driver);
+ executeStatementOnDriver("create table " + TABLE1 + "(a int, b
varchar(128), c float) " +
+ "stored as orc TBLPROPERTIES ('transactional'='true')", driver);
+ executeStatementOnDriver("insert into " + TABLE1 + "(a, b, c) values (1,
'one', 1.1)", driver);
+ executeStatementOnDriver("insert into " + TABLE1 + "(a, b, c) values (2,
'two', 2.2)", driver);
+
+ executeStatementOnDriver("delete from " + TABLE1 + " where a = 1", driver);
+
+ CompactorTestUtil.runCompaction(conf, DB, TABLE1 , CompactionType.MAJOR,
true);
+ CompactorTestUtil.runCleaner(conf);
+ verifySuccessfulCompaction(1);
+
+ org.apache.hadoop.hive.ql.metadata.Table table = Hive.get().getTable(DB,
TABLE1);
+
+ Assert.assertEquals(3,
StatsSetupConst.getColumnsHavingStats(table.getParameters()).size());
+ }
+
+ @Test
+ public void testMajorCompactionUpdateMissingColumnStatsOfPartition() throws
Exception {
+ executeStatementOnDriver("drop table if exists " + TABLE1, driver);
+ executeStatementOnDriver("create table " + TABLE1 + "(a int, b
varchar(128), c float) partitioned by (p string) " +
+ "stored as orc TBLPROPERTIES ('transactional'='true')", driver);
+ executeStatementOnDriver("insert into " + TABLE1 + "(a, b, c, p) values
(1, 'one', 1.1, 'p1')", driver);
+ executeStatementOnDriver("insert into " + TABLE1 + "(a, b, c, p) values
(2, 'two', 2.2, 'p1')", driver);
+
+ executeStatementOnDriver("delete from " + TABLE1 + " where a = 1", driver);
+
+ CompactorTestUtil.runCompaction(conf, DB, TABLE1 , CompactionType.MAJOR,
true, "p=p1");
+ CompactorTestUtil.runCleaner(conf);
+ verifySuccessfulCompaction(1);
+
+ org.apache.hadoop.hive.ql.metadata.Table table = Hive.get().getTable(DB,
TABLE1);
+ Partition partition = Hive.get().getPartition(table, new HashMap<String,
String>() {{ put("p", "p1"); }});
+
+ Assert.assertEquals(3,
StatsSetupConst.getColumnsHavingStats(partition.getParameters()).size());
+ }
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/StatsUpdater.java
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/StatsUpdater.java
index c22d4c2c51d..d7a79e89b59 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/StatsUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/StatsUpdater.java
@@ -17,10 +17,12 @@
*/
package org.apache.hadoop.hive.ql.txn.compactor;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.txn.entities.CompactionInfo;
import org.apache.hadoop.hive.metastore.utils.StringableMap;
import org.apache.hadoop.hive.ql.DriverUtils;
@@ -32,6 +34,7 @@
import java.util.List;
import java.util.Map;
+import java.util.stream.Collectors;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static
org.apache.hadoop.hive.ql.txn.compactor.CompactorUtil.overrideConfProps;
@@ -71,6 +74,8 @@ public void gatherStats(HiveConf hiveConf, CompactionInfo ci,
Map<String, String
// compute statistics for columns viewtime
StringBuilder sb = new StringBuilder("analyze table ")
.append(StatsUtils.getFullyQualifiedTableName(ci.dbname,
ci.tableName));
+
+ final Map<String, String> properties;
if (ci.partName != null) {
sb.append(" partition(");
Map<String, String> partitionColumnValues =
Warehouse.makeEscSpecFromName(ci.partName);
@@ -79,12 +84,23 @@ public void gatherStats(HiveConf hiveConf, CompactionInfo
ci, Map<String, String
}
sb.setLength(sb.length() - 1); //remove trailing ,
sb.append(")");
+
+ Partition partition = CompactorUtil.resolvePartition(
+ hiveConf, msc, ci.dbname, ci.tableName, ci.partName,
CompactorUtil.METADATA_FETCH_MODE.REMOTE);
+ properties = partition.getParameters();
+ } else {
+ properties = tableProperties;
}
+
sb.append(" compute statistics");
- if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER) &&
ci.isMajorCompaction()) {
- List<String> columnList =
msc.findColumnsWithStats(CompactionInfo.compactionInfoToStruct(ci));
+ if (ci.isMajorCompaction()) {
+ List<String> columnList =
msc.findColumnsWithStats(CompactionInfo.compactionInfoToStruct(ci)).stream()
+ .filter(columnName ->
!StatsSetupConst.areColumnStatsUptoDate(properties, columnName))
+ .collect(Collectors.toList());
if (!columnList.isEmpty()) {
sb.append(" for columns ").append(String.join(",",
columnList));
+ } else if (StatsSetupConst.areBasicStatsUptoDate(properties)) {
+ sb.append(" noscan");
}
} else {
sb.append(" noscan");