HIVE-20246: Configurable collecting stats by using DO_NOT_UPDATE_STATS table 
property (Alice Fan, reviewed by Aihua Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff255432
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff255432
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff255432

Branch: refs/heads/master
Commit: ff25543282532613c9cfa768a8bf8ee728b3734e
Parents: 1f9c70e
Author: Aihua Xu <aihu...@apache.org>
Authored: Wed Aug 22 10:05:12 2018 -0700
Committer: Aihua Xu <aihu...@apache.org>
Committed: Wed Aug 22 10:05:12 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hive/metastore/HiveMetaStore.java    | 27 ++++++++--
 .../hive/metastore/TestHiveMetaStore.java       | 57 +++++++++++++++++++-
 2 files changed, 79 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ff255432/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 067eb5a..e971d0f 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -3174,8 +3174,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         part.setCreateTime((int) time);
         part.putToParameters(hive_metastoreConstants.DDL_TIME, 
Long.toString(time));
 
-        if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) &&
-            !MetaStoreServerUtils.isView(tbl)) {
+        if (canUpdateStats(tbl)) {
           MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, 
madeDir, false, envContext, true);
         }
 
@@ -3791,6 +3790,27 @@ public class HiveMetaStore extends ThriftHiveMetastore {
       return result;
     }
 
+    /**
+     * Verify if update stats while altering partition(s)
+     * For the following three cases HMS will not update partition stats
+     * 1) Table property 'DO_NOT_UPDATE_STATS' = True
+     * 2) HMS configuration property 'STATS_AUTO_GATHER' = False
+     * 3) Is View
+     */
+    private boolean canUpdateStats(Table tbl) {
+        Map<String,String> tblParams = tbl.getParameters();
+        boolean updateStatsTbl = true;
+        if ((tblParams != null) && 
tblParams.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)) {
+            updateStatsTbl = 
!Boolean.valueOf(tblParams.get(StatsSetupConst.DO_NOT_UPDATE_STATS));
+        }
+        if (!MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) ||
+            MetaStoreServerUtils.isView(tbl) ||
+            !updateStatsTbl) {
+          return false;
+        }
+        return true;
+    }
+
     private void initializeAddedPartition(
         final Table tbl, final Partition part, boolean madeDir) throws 
MetaException {
       initializeAddedPartition(tbl, new 
PartitionSpecProxy.SimplePartitionWrapperIterator(part), madeDir);
@@ -3798,8 +3818,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
 
     private void initializeAddedPartition(
         final Table tbl, final PartitionSpecProxy.PartitionIterator part, 
boolean madeDir) throws MetaException {
-      if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) &&
-          !MetaStoreServerUtils.isView(tbl)) {
+      if (canUpdateStats(tbl)) {
         MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, 
false, null, true);
       }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ff255432/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
 
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
index 60beab6..4937d9d 100644
--- 
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
+++ 
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.hive.metastore;
 
-import java.lang.reflect.Field;
 import java.io.IOException;
 import java.sql.Connection;
 import java.sql.DriverManager;
@@ -38,6 +37,8 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
+import java.lang.reflect.*;
+import static org.mockito.Mockito.mock;
 
 import com.google.common.collect.Sets;
 import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder;
@@ -57,6 +58,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
@@ -95,6 +97,8 @@ import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
 
 public abstract class TestHiveMetaStore {
   private static final Logger LOG = 
LoggerFactory.getLogger(TestHiveMetaStore.class);
@@ -3098,4 +3102,55 @@ public abstract class TestHiveMetaStore {
     int size = allUuids.size();
     assertEquals(numAPICallsPerThread * parallelCalls, size);
   }
+
+  /**
+   * While altering partition(s), verify DO NOT calculate partition statistics 
if
+   * <ol>
+   *   <li>table property DO_NOT_UPDATE_STATS is true</li>
+   *   <li>STATS_AUTO_GATHER is false</li>
+   *   <li>Is View</li>
+   * </ol>
+   */
+  @Test
+  public void testUpdatePartitionStat_doesNotUpdateStats() throws Exception {
+    final String DB_NAME = "db1";
+    final String TABLE_NAME = "tbl1";
+    Table tbl = new TableBuilder()
+        .setDbName(DB_NAME)
+        .setTableName(TABLE_NAME)
+        .addCol("id", "int")
+        .addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "true")
+        .build(null);
+    List<String> vals = new ArrayList<>(2);
+    vals.add("col1");
+    vals.add("col2");
+    Partition part = new Partition();
+    part.setDbName(DB_NAME);
+    part.setTableName(TABLE_NAME);
+    part.setValues(vals);
+    part.setParameters(new HashMap<>());
+    part.setSd(tbl.getSd().deepCopy());
+    part.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo());
+    part.getSd().setLocation(tbl.getSd().getLocation() + "/partCol=1");
+    Warehouse wh = mock(Warehouse.class);
+    //Execute initializeAddedPartition() and it should not trigger 
updatePartitionStatsFast() as DO_NOT_UPDATE_STATS is true
+    HiveMetaStore.HMSHandler hms = new HiveMetaStore.HMSHandler("", conf, 
false);
+    Method m = hms.getClass().getDeclaredMethod("initializeAddedPartition", 
Table.class, Partition.class, boolean.class);
+    m.setAccessible(true);
+    //Invoke initializeAddedPartition();
+    m.invoke(hms, tbl, part, false);
+    verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation());
+
+    //Remove tbl's DO_NOT_UPDATE_STATS & set STATS_AUTO_GATHER = false
+    tbl.unsetParameters();
+    MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, false);
+    m.invoke(hms, tbl, part, false);
+    verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation());
+
+    //Set STATS_AUTO_GATHER = true and set tbl as a VIRTUAL_VIEW
+    MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, true);
+    tbl.setTableType("VIRTUAL_VIEW");
+    m.invoke(hms, tbl, part, false);
+    verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation());
+  }
 }

Reply via email to