HIVE-20246: Configurable collecting stats by using DO_NOT_UPDATE_STATS table property (Alice Fan, reviewed by Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff255432 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff255432 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff255432 Branch: refs/heads/master Commit: ff25543282532613c9cfa768a8bf8ee728b3734e Parents: 1f9c70e Author: Aihua Xu <aihu...@apache.org> Authored: Wed Aug 22 10:05:12 2018 -0700 Committer: Aihua Xu <aihu...@apache.org> Committed: Wed Aug 22 10:05:12 2018 -0700 ---------------------------------------------------------------------- .../hadoop/hive/metastore/HiveMetaStore.java | 27 ++++++++-- .../hive/metastore/TestHiveMetaStore.java | 57 +++++++++++++++++++- 2 files changed, 79 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ff255432/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 067eb5a..e971d0f 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -3174,8 +3174,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { part.setCreateTime((int) time); part.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(time)); - if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreServerUtils.isView(tbl)) { + if (canUpdateStats(tbl)) { MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, envContext, true); } @@ -3791,6 +3790,27 @@ public class HiveMetaStore extends ThriftHiveMetastore { return result; } + /** + * Verify if update stats while altering partition(s) + * For the following three cases HMS will not update partition stats + * 1) Table property 'DO_NOT_UPDATE_STATS' = True + * 2) HMS configuration property 'STATS_AUTO_GATHER' = False + * 3) Is View + */ + private boolean canUpdateStats(Table tbl) { + Map<String,String> tblParams = tbl.getParameters(); + boolean updateStatsTbl = true; + if ((tblParams != null) && tblParams.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)) { + updateStatsTbl = !Boolean.valueOf(tblParams.get(StatsSetupConst.DO_NOT_UPDATE_STATS)); + } + if (!MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) || + MetaStoreServerUtils.isView(tbl) || + !updateStatsTbl) { + return false; + } + return true; + } + private void initializeAddedPartition( final Table tbl, final Partition part, boolean madeDir) throws MetaException { initializeAddedPartition(tbl, new PartitionSpecProxy.SimplePartitionWrapperIterator(part), madeDir); @@ -3798,8 +3818,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { private void initializeAddedPartition( final Table tbl, final PartitionSpecProxy.PartitionIterator part, boolean madeDir) throws MetaException { - if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreServerUtils.isView(tbl)) { + if (canUpdateStats(tbl)) { MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, null, true); } http://git-wip-us.apache.org/repos/asf/hive/blob/ff255432/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 60beab6..4937d9d 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.metastore; -import java.lang.reflect.Field; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; @@ -38,6 +37,8 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.lang.reflect.*; +import static org.mockito.Mockito.mock; import com.google.common.collect.Sets; import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; @@ -57,6 +58,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -95,6 +97,8 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; public abstract class TestHiveMetaStore { private static final Logger LOG = LoggerFactory.getLogger(TestHiveMetaStore.class); @@ -3098,4 +3102,55 @@ public abstract class TestHiveMetaStore { int size = allUuids.size(); assertEquals(numAPICallsPerThread * parallelCalls, size); } + + /** + * While altering partition(s), verify DO NOT calculate partition statistics if + * <ol> + * <li>table property DO_NOT_UPDATE_STATS is true</li> + * <li>STATS_AUTO_GATHER is false</li> + * <li>Is View</li> + * </ol> + */ + @Test + public void testUpdatePartitionStat_doesNotUpdateStats() throws Exception { + final String DB_NAME = "db1"; + final String TABLE_NAME = "tbl1"; + Table tbl = new TableBuilder() + .setDbName(DB_NAME) + .setTableName(TABLE_NAME) + .addCol("id", "int") + .addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "true") + .build(null); + List<String> vals = new ArrayList<>(2); + vals.add("col1"); + vals.add("col2"); + Partition part = new Partition(); + part.setDbName(DB_NAME); + part.setTableName(TABLE_NAME); + part.setValues(vals); + part.setParameters(new HashMap<>()); + part.setSd(tbl.getSd().deepCopy()); + part.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo()); + part.getSd().setLocation(tbl.getSd().getLocation() + "/partCol=1"); + Warehouse wh = mock(Warehouse.class); + //Execute initializeAddedPartition() and it should not trigger updatePartitionStatsFast() as DO_NOT_UPDATE_STATS is true + HiveMetaStore.HMSHandler hms = new HiveMetaStore.HMSHandler("", conf, false); + Method m = hms.getClass().getDeclaredMethod("initializeAddedPartition", Table.class, Partition.class, boolean.class); + m.setAccessible(true); + //Invoke initializeAddedPartition(); + m.invoke(hms, tbl, part, false); + verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation()); + + //Remove tbl's DO_NOT_UPDATE_STATS & set STATS_AUTO_GATHER = false + tbl.unsetParameters(); + MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, false); + m.invoke(hms, tbl, part, false); + verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation()); + + //Set STATS_AUTO_GATHER = true and set tbl as a VIRTUAL_VIEW + MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, true); + tbl.setTableType("VIRTUAL_VIEW"); + m.invoke(hms, tbl, part, false); + verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation()); + } }