This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d2dceb27e2 HIVE-28960: Compaction Stats updater does not collect 
column stats when hive.stats.autogather is true (#5822)
8d2dceb27e2 is described below

commit 8d2dceb27e29905570dbade3bc66596f86943e2f
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue May 27 14:42:42 2025 +0200

    HIVE-28960: Compaction Stats updater does not collect column stats when 
hive.stats.autogather is true (#5822)
---
 .../ql/txn/compactor/TestCrudCompactorOnTez.java   | 44 ++++++++++++++++++++++
 .../hadoop/hive/ql/txn/compactor/StatsUpdater.java | 20 +++++++++-
 2 files changed, 62 insertions(+), 2 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
index 6f662f2cfee..035a1f9ca2c 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
@@ -38,6 +38,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
@@ -65,6 +66,7 @@
 import org.apache.hadoop.hive.ql.io.BucketCodec;
 import org.apache.hadoop.hive.ql.lockmgr.LockException;
 import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
 import org.apache.hive.streaming.HiveStreamingConnection;
 import org.apache.hive.streaming.StreamingConnection;
@@ -93,6 +95,9 @@
 @SuppressWarnings("deprecation")
 public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
+  private static final String DB = "default";
+  private static final String TABLE1 = "t1";
+
   @Test
   public void 
testRebalanceCompactionWithParallelDeleteAsSecondOptimisticLock() throws 
Exception {
     testRebalanceCompactionWithParallelDeleteAsSecond(true);
@@ -3665,4 +3670,43 @@ public void 
testFallbackForMergeCompactionWhenDeleteDeltaPresent() throws Except
     verify(primary.get(), times(1)).run(any());
     verify(secondary.get(), times(1)).run(any());
   }
+
+  @Test
+  public void testMajorCompactionUpdateMissingColumnStats() throws Exception {
+    executeStatementOnDriver("drop table if exists " + TABLE1, driver);
+    executeStatementOnDriver("create table " + TABLE1 + "(a int, b 
varchar(128), c float) " +
+            "stored as orc TBLPROPERTIES ('transactional'='true')", driver);
+    executeStatementOnDriver("insert into " + TABLE1 + "(a, b, c) values (1, 
'one', 1.1)", driver);
+    executeStatementOnDriver("insert into " + TABLE1 + "(a, b, c) values (2, 
'two', 2.2)", driver);
+
+    executeStatementOnDriver("delete from " + TABLE1 + " where a = 1", driver);
+
+    CompactorTestUtil.runCompaction(conf, DB,  TABLE1 , CompactionType.MAJOR, 
true);
+    CompactorTestUtil.runCleaner(conf);
+    verifySuccessfulCompaction(1);
+
+    org.apache.hadoop.hive.ql.metadata.Table table = Hive.get().getTable(DB, 
TABLE1);
+
+    Assert.assertEquals(3, 
StatsSetupConst.getColumnsHavingStats(table.getParameters()).size());
+  }
+
+  @Test
+  public void testMajorCompactionUpdateMissingColumnStatsOfPartition() throws 
Exception {
+    executeStatementOnDriver("drop table if exists " + TABLE1, driver);
+    executeStatementOnDriver("create table " + TABLE1 + "(a int, b 
varchar(128), c float) partitioned by (p string) " +
+            "stored as orc TBLPROPERTIES ('transactional'='true')", driver);
+    executeStatementOnDriver("insert into " + TABLE1 + "(a, b, c, p) values 
(1, 'one', 1.1, 'p1')", driver);
+    executeStatementOnDriver("insert into " + TABLE1 + "(a, b, c, p) values 
(2, 'two', 2.2, 'p1')", driver);
+
+    executeStatementOnDriver("delete from " + TABLE1 + " where a = 1", driver);
+
+    CompactorTestUtil.runCompaction(conf, DB,  TABLE1 , CompactionType.MAJOR, 
true, "p=p1");
+    CompactorTestUtil.runCleaner(conf);
+    verifySuccessfulCompaction(1);
+
+    org.apache.hadoop.hive.ql.metadata.Table table = Hive.get().getTable(DB, 
TABLE1);
+    Partition partition = Hive.get().getPartition(table, new HashMap<String, 
String>() {{ put("p", "p1"); }});
+
+    Assert.assertEquals(3, 
StatsSetupConst.getColumnsHavingStats(partition.getParameters()).size());
+  }
 }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/StatsUpdater.java 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/StatsUpdater.java
index c22d4c2c51d..d7a79e89b59 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/StatsUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/StatsUpdater.java
@@ -17,10 +17,12 @@
  */
 package org.apache.hadoop.hive.ql.txn.compactor;
 
+import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.txn.entities.CompactionInfo;
 import org.apache.hadoop.hive.metastore.utils.StringableMap;
 import org.apache.hadoop.hive.ql.DriverUtils;
@@ -32,6 +34,7 @@
 
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 import static org.apache.commons.lang3.StringUtils.isNotBlank;
 import static 
org.apache.hadoop.hive.ql.txn.compactor.CompactorUtil.overrideConfProps;
@@ -71,6 +74,8 @@ public void gatherStats(HiveConf hiveConf, CompactionInfo ci, 
Map<String, String
             // compute statistics for columns viewtime
             StringBuilder sb = new StringBuilder("analyze table ")
                     .append(StatsUtils.getFullyQualifiedTableName(ci.dbname, 
ci.tableName));
+
+            final Map<String, String> properties;
             if (ci.partName != null) {
                 sb.append(" partition(");
                 Map<String, String> partitionColumnValues = 
Warehouse.makeEscSpecFromName(ci.partName);
@@ -79,12 +84,23 @@ public void gatherStats(HiveConf hiveConf, CompactionInfo 
ci, Map<String, String
                 }
                 sb.setLength(sb.length() - 1); //remove trailing ,
                 sb.append(")");
+
+                Partition partition = CompactorUtil.resolvePartition(
+                        hiveConf, msc, ci.dbname, ci.tableName, ci.partName, 
CompactorUtil.METADATA_FETCH_MODE.REMOTE);
+                properties = partition.getParameters();
+            } else {
+                properties = tableProperties;
             }
+
             sb.append(" compute statistics");
-            if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER) && 
ci.isMajorCompaction()) {
-                List<String> columnList = 
msc.findColumnsWithStats(CompactionInfo.compactionInfoToStruct(ci));
+            if (ci.isMajorCompaction()) {
+                List<String> columnList = 
msc.findColumnsWithStats(CompactionInfo.compactionInfoToStruct(ci)).stream()
+                        .filter(columnName -> 
!StatsSetupConst.areColumnStatsUptoDate(properties, columnName))
+                        .collect(Collectors.toList());
                 if (!columnList.isEmpty()) {
                     sb.append(" for columns ").append(String.join(",", 
columnList));
+                } else if (StatsSetupConst.areBasicStatsUptoDate(properties)) {
+                    sb.append(" noscan");
                 }
             } else {
                 sb.append(" noscan");

Reply via email to