http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java new file mode 100644 index 0000000..54828f2 --- /dev/null +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.AggrStats; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.FileMetadataExprType; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SQLForeignKey; +import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestOldSchema { + private ObjectStore store = null; + + private static final Logger LOG = LoggerFactory.getLogger(TestOldSchema.class.getName()); + + public static class MockPartitionExpressionProxy implements PartitionExpressionProxy { + @Override + public String convertExprToFilter(byte[] expr) throws MetaException { + return null; + } + + @Override + public boolean filterPartitionsByExpr(List<String> partColumnNames, + List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] expr, String defaultPartitionName, + List<String> partitionNames) throws MetaException { + return false; + } + + @Override + public FileMetadataExprType getMetadataType(String inputFormat) { + return null; + } + + @Override + public SearchArgument createSarg(byte[] expr) { + return null; + } + + @Override + public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) { + return null; + } + } + + String bitVectors[] = new String[2]; + + @Before + public void setUp() throws Exception { + HiveConf conf = new HiveConf(); + conf.setVar(HiveConf.ConfVars.METASTORE_EXPRESSION_PROXY_CLASS, + MockPartitionExpressionProxy.class.getName()); + conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR, false); + + store = new ObjectStore(); + store.setConf(conf); + dropAllStoreObjects(store); + + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + bitVectors[1] = hll.serialize(); + hll = HyperLogLog.builder().build(); + hll.addLong(2); + hll.addLong(3); + hll.addLong(3); + hll.addLong(4); + bitVectors[0] = hll.serialize(); + } + + @After + public void tearDown() { + } + + /** + * Tests partition operations + * + * @throws Exception + */ + @Test + public void testPartitionOps() throws Exception { + String dbName = "default"; + String tableName = "snp"; + Database db1 = new Database(dbName, "description", "locationurl", null); + store.createDatabase(db1); + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col1", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + Deadline.startTimer("getPartition"); + for (int i = 0; i < 10; i++) { + List<String> partVal = new ArrayList<>(); + partVal.add(String.valueOf(i)); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + String.valueOf(i)); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1"); + obj.setColType("bigint"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors[0]); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(10, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col1", cso.getColName()); + Assert.assertEquals("bigint", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(91, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col1")); + statChecker.checkStats(aggrStats); + + } + + private static interface Checker { + void checkStats(AggrStats aggrStats) throws Exception; + } + + public static void dropAllStoreObjects(RawStore store) throws MetaException, + InvalidObjectException, InvalidInputException { + try { + Deadline.registerIfNot(100000); + Deadline.startTimer("getPartition"); + List<String> dbs = store.getAllDatabases(); + for (int i = 0; i < dbs.size(); i++) { + String db = dbs.get(i); + List<String> tbls = store.getAllTables(db); + for (String tbl : tbls) { + List<Partition> parts = store.getPartitions(db, tbl, 100); + for (Partition part : parts) { + store.dropPartition(db, tbl, part.getValues()); + } + store.dropTable(db, tbl); + } + store.dropDatabase(db); + } + } catch (NoSuchObjectException e) { + } + } + +}
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java index 1fa9447..e31dad3 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java @@ -23,6 +23,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.ObjectStore; import org.apache.hadoop.hive.metastore.TableType; @@ -740,4 +742,158 @@ public class TestCachedStore { aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); } + + @Test + public void testPartitionAggrStats() throws Exception { + String dbName = "testTableColStatsOps1"; + String tblName = "tbl1"; + String colName = "f1"; + + Database db = new Database(dbName, null, "some_location", null); + cachedStore.createDatabase(db); + + List<FieldSchema> cols = new ArrayList<FieldSchema>(); + cols.add(new FieldSchema(colName, "int", null)); + List<FieldSchema> partCols = new ArrayList<FieldSchema>(); + partCols.add(new FieldSchema("col", "int", null)); + StorageDescriptor sd = + new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<String, String>()), + null, null, null); + + Table tbl = + new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<String, String>(), + null, null, TableType.MANAGED_TABLE.toString()); + cachedStore.createTable(tbl); + + List<String> partVals1 = new ArrayList<String>(); + partVals1.add("1"); + List<String> partVals2 = new ArrayList<String>(); + partVals2.add("2"); + + Partition ptn1 = + new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<String, String>()); + cachedStore.addPartition(ptn1); + Partition ptn2 = + new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<String, String>()); + cachedStore.addPartition(ptn2); + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName); + statsDesc.setPartName("col"); + List<ColumnStatisticsObj> colStatObjs = new ArrayList<ColumnStatisticsObj>(); + + ColumnStatisticsData data = new ColumnStatisticsData(); + ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data); + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setLowValue(0); + longStats.setHighValue(100); + longStats.setNumNulls(50); + longStats.setNumDVs(30); + data.setLongStats(longStats); + colStatObjs.add(colStats); + + stats.setStatsDesc(statsDesc); + stats.setStatsObj(colStatObjs); + + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1); + + longStats.setNumDVs(40); + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2); + + List<String> colNames = new ArrayList<String>(); + colNames.add(colName); + List<String> aggrPartVals = new ArrayList<String>(); + aggrPartVals.add("1"); + aggrPartVals.add("2"); + AggrStats aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 40); + aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 40); + } + + @Test + public void testPartitionAggrStatsBitVector() throws Exception { + String dbName = "testTableColStatsOps2"; + String tblName = "tbl2"; + String colName = "f1"; + + Database db = new Database(dbName, null, "some_location", null); + cachedStore.createDatabase(db); + + List<FieldSchema> cols = new ArrayList<FieldSchema>(); + cols.add(new FieldSchema(colName, "int", null)); + List<FieldSchema> partCols = new ArrayList<FieldSchema>(); + partCols.add(new FieldSchema("col", "int", null)); + StorageDescriptor sd = + new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<String, String>()), + null, null, null); + + Table tbl = + new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<String, String>(), + null, null, TableType.MANAGED_TABLE.toString()); + cachedStore.createTable(tbl); + + List<String> partVals1 = new ArrayList<String>(); + partVals1.add("1"); + List<String> partVals2 = new ArrayList<String>(); + partVals2.add("2"); + + Partition ptn1 = + new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<String, String>()); + cachedStore.addPartition(ptn1); + Partition ptn2 = + new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<String, String>()); + cachedStore.addPartition(ptn2); + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName); + statsDesc.setPartName("col"); + List<ColumnStatisticsObj> colStatObjs = new ArrayList<ColumnStatisticsObj>(); + + ColumnStatisticsData data = new ColumnStatisticsData(); + ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data); + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setLowValue(0); + longStats.setHighValue(100); + longStats.setNumNulls(50); + longStats.setNumDVs(30); + + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + hll.addLong(2); + hll.addLong(3); + longStats.setBitVectors(hll.serialize()); + + data.setLongStats(longStats); + colStatObjs.add(colStats); + + stats.setStatsDesc(statsDesc); + stats.setStatsObj(colStatObjs); + + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1); + + longStats.setNumDVs(40); + hll = HyperLogLog.builder().build(); + hll.addLong(2); + hll.addLong(3); + hll.addLong(4); + hll.addLong(5); + longStats.setBitVectors(hll.serialize()); + + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2); + + List<String> colNames = new ArrayList<String>(); + colNames.add(colName); + List<String> aggrPartVals = new ArrayList<String>(); + aggrPartVals.add("1"); + aggrPartVals.add("2"); + AggrStats aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5); + aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java index ecc99c3..9cf1fb8 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java @@ -28,6 +28,7 @@ import java.util.TreeMap; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -115,7 +116,11 @@ public class TestHBaseAggregateStatsCacheWithBitVector { dcsd.setLowValue(-20.1234213423); dcsd.setNumNulls(30); dcsd.setNumDVs(12342); - dcsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"); + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addDouble(1); + hll.addDouble(2); + hll.addDouble(3); + dcsd.setBitVectors(hll.serialize()); data.setDoubleStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); @@ -135,7 +140,11 @@ public class TestHBaseAggregateStatsCacheWithBitVector { dcsd.setLowValue(-20.1234213423); dcsd.setNumNulls(30); dcsd.setNumDVs(12342); - dcsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}"); + hll = HyperLogLog.builder().build(); + hll.addDouble(3); + hll.addDouble(4); + hll.addDouble(5); + dcsd.setBitVectors(hll.serialize()); data.setDoubleStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java index 99ce96c..4d868b0 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java @@ -28,6 +28,7 @@ import java.util.TreeMap; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.api.AggrStats; @@ -62,8 +63,7 @@ public class TestHBaseAggregateStatsExtrapolation { SortedMap<String, Cell> rows = new TreeMap<>(); // NDV will be 3 for the bitVectors - String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"; - + String bitVectors = null; @Before public void before() throws IOException { MockitoAnnotations.initMocks(this); @@ -71,6 +71,11 @@ public class TestHBaseAggregateStatsExtrapolation { conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true); store = MockUtils.init(conf, htable, rows); store.backdoor().getStatsCache().resetCounters(); + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + hll.addLong(2); + hll.addLong(3); + bitVectors = hll.serialize(); } private static interface Checker { @@ -395,7 +400,7 @@ public class TestHBaseAggregateStatsExtrapolation { dcsd.setHighValue(1000 + i); dcsd.setLowValue(-1000 - i); dcsd.setNumNulls(i); - dcsd.setNumDVs(10 * i); + dcsd.setNumDVs(i == 0 ? 1 : 10 * i); data.setLongStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java index 74e1669..0ad2780 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java @@ -28,6 +28,7 @@ import java.util.TreeMap; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.api.AggrStats; @@ -61,9 +62,8 @@ public class TestHBaseAggregateStatsNDVUniformDist { SortedMap<String, Cell> rows = new TreeMap<>(); // NDV will be 3 for bitVectors[0] and 1 for bitVectors[1] - String bitVectors[] = { - "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}", - "{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}" }; + String bitVectors[] = new String[2]; + @Before public void before() throws IOException { @@ -73,6 +73,15 @@ public class TestHBaseAggregateStatsNDVUniformDist { conf.setBoolean(HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION.varname, true); store = MockUtils.init(conf, htable, rows); store.backdoor().getStatsCache().resetCounters(); + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + bitVectors[1] = hll.serialize(); + hll = HyperLogLog.builder().build(); + hll.addLong(2); + hll.addLong(3); + hll.addLong(3); + hll.addLong(4); + bitVectors[0] = hll.serialize(); } private static interface Checker { http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 97bf839..16c440f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -3396,7 +3396,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { ColStatistics.Range r = cs.getRange(); StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), - cs.getNumNulls(), cs.getCountDistint(), cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses()); + cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses()); ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data); colStats = Collections.singletonList(cso); StatsSetupConst.setColumnStatsState(tblProps, colNames); http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index aa77234..2380073 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -695,38 +695,40 @@ public final class MetaDataFormatUtils { ColumnStatisticsData csd = cso.getStatsData(); if (csd.isSetBinaryStats()) { BinaryColumnStatsData bcsd = csd.getBinaryStats(); - appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", bcsd.getAvgColLen(), + appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", bcsd.getAvgColLen(), bcsd.getMaxColLen(), "", ""); } else if (csd.isSetStringStats()) { StringColumnStatsData scsd = csd.getStringStats(); appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(), - scsd.getAvgColLen(), scsd.getMaxColLen(), "", ""); + scsd.getBitVectors() == null ? "" : scsd.getBitVectors(), scsd.getAvgColLen(), + scsd.getMaxColLen(), "", ""); } else if (csd.isSetBooleanStats()) { BooleanColumnStatsData bcsd = csd.getBooleanStats(); - appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "", + appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "", "", bcsd.getNumTrues(), bcsd.getNumFalses()); } else if (csd.isSetDecimalStats()) { DecimalColumnStatsData dcsd = csd.getDecimalStats(); appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), + dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); } else if (csd.isSetDoubleStats()) { DoubleColumnStatsData dcsd = csd.getDoubleStats(); appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(), - dcsd.getNumDVs(), "", "", "", ""); + dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); } else if (csd.isSetLongStats()) { LongColumnStatsData lcsd = csd.getLongStats(); appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(), - lcsd.getNumDVs(), "", "", "", ""); + lcsd.getNumDVs(), lcsd.getBitVectors() == null ? "" : lcsd.getBitVectors(), "", "", "", ""); } else if (csd.isSetDateStats()) { DateColumnStatsData dcsd = csd.getDateStats(); appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), - dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", ""); + dcsd.getNumNulls(), dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); } } else { - appendColumnStats(tableInfo, "", "", "", "", "", "", "", ""); + appendColumnStats(tableInfo, "", "", "", "", "", "", "", "", ""); } } @@ -779,7 +781,7 @@ public final class MetaDataFormatUtils { } private static void appendColumnStats(StringBuilder sb, Object min, Object max, Object numNulls, - Object ndv, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) { + Object ndv, Object bitVector, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) { sb.append(String.format("%-" + ALIGNMENT + "s", min)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", max)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", numNulls)).append(FIELD_DELIM); @@ -788,6 +790,7 @@ public final class MetaDataFormatUtils { sb.append(String.format("%-" + ALIGNMENT + "s", maxColLen)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", numTrues)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", numFalses)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", bitVector)).append(FIELD_DELIM); } private static void appendColumnStatsNoFormatting(StringBuilder sb, Object min, http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index 41a1c7a..f2d2e2d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -18,9 +18,6 @@ package org.apache.hadoop.hive.ql.plan; -import org.apache.hadoop.hive.ql.stats.StatsUtils; - - public class ColStatistics { private String colName; http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java index d7a9888..845ffcf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java @@ -59,8 +59,8 @@ public class DescTableDesc extends DDLDesc implements Serializable { */ private static final String schema = "col_name,data_type,comment#string:string:string"; private static final String colStatsSchema = "col_name,data_type,min,max,num_nulls," - + "distinct_count,avg_col_len,max_col_len,num_trues,num_falses,comment" - + "#string:string:string:string:string:string:string:string:string:string:string"; + + "distinct_count,avg_col_len,max_col_len,num_trues,num_falses,comment,bitVector" + + "#string:string:string:string:string:string:string:string:string:string:string:string"; public DescTableDesc() { } http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 2d56950..8ee41bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -23,9 +23,9 @@ import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.classification.InterfaceAudience; -import org.apache.hadoop.hive.common.ndv.FMSketch; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.common.ndv.fm.FMSketch; import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q b/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q new file mode 100644 index 0000000..d64263f --- /dev/null +++ b/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q @@ -0,0 +1,139 @@ +set hive.stats.fetch.bitvector=false; + +create table src_stat as select * from src1; + +create table src_stat_int ( + key double, + value string +); + +LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int; + +ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key; + +describe formatted src_stat key; + +ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111'); + +describe formatted src_stat key; + +ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124'); + +describe formatted src_stat value; + +ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key; + +describe formatted src_stat_int key; + +ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22'); + +describe formatted src_stat_int key; + + + +create database if not exists dummydb; + +use dummydb; + +ALTER TABLE default.src_stat UPDATE STATISTICS for column key SET ('numDVs'='3333','avgColLen'='2.222'); + +describe formatted default.src_stat key; + +ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235'); + +describe formatted default.src_stat value; + +use default; + +drop database dummydb; + +create table datatype_stats( + t TINYINT, + s SMALLINT, + i INT, + b BIGINT, + f FLOAT, + d DOUBLE, + dem DECIMAL, --default decimal (10,0) + ts TIMESTAMP, + dt DATE, + str STRING, + v VARCHAR(12), + c CHAR(5), + bl BOOLEAN, + bin BINARY); + +INSERT INTO datatype_stats values(2, 3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin'); +INSERT INTO datatype_stats values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +DESC FORMATTED datatype_stats s; +DESC FORMATTED datatype_stats i; +DESC FORMATTED datatype_stats b; +DESC FORMATTED datatype_stats f; +DESC FORMATTED datatype_stats d; +DESC FORMATTED datatype_stats dem; +DESC FORMATTED datatype_stats ts; +DESC FORMATTED datatype_stats dt; +DESC FORMATTED datatype_stats str; +DESC FORMATTED datatype_stats v; +DESC FORMATTED datatype_stats c; +DESC FORMATTED datatype_stats bl; +DESC FORMATTED datatype_stats bin; + +--tinyint +DESC FORMATTED datatype_stats t; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35'); +DESC FORMATTED datatype_stats t; +--smallint +DESC FORMATTED datatype_stats s; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25'); +DESC FORMATTED datatype_stats s; +--int +DESC FORMATTED datatype_stats i; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5'); +DESC FORMATTED datatype_stats i; +--bigint +DESC FORMATTED datatype_stats b; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8'); +DESC FORMATTED datatype_stats b; + +--float +DESC FORMATTED datatype_stats f; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00'); +DESC FORMATTED datatype_stats f; +--double +DESC FORMATTED datatype_stats d; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455'); +DESC FORMATTED datatype_stats d; +--decimal +DESC FORMATTED datatype_stats dem; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0'); +DESC FORMATTED datatype_stats dem; +--timestamp +DESC FORMATTED datatype_stats ts; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924'); +DESC FORMATTED datatype_stats ts; +--decimal +DESC FORMATTED datatype_stats dt; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04'); +DESC FORMATTED datatype_stats dt; +--string +DESC FORMATTED datatype_stats str; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235'); +DESC FORMATTED datatype_stats str; +--varchar +DESC FORMATTED datatype_stats v; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25'); +DESC FORMATTED datatype_stats v; +--char +DESC FORMATTED datatype_stats c; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58'); +DESC FORMATTED datatype_stats c; +--boolean +DESC FORMATTED datatype_stats bl; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8'); +DESC FORMATTED datatype_stats bl; +--binary +DESC FORMATTED datatype_stats bin; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8'); +DESC FORMATTED datatype_stats bin; + http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/queries/clientpositive/bitvector.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bitvector.q b/ql/src/test/queries/clientpositive/bitvector.q new file mode 100644 index 0000000..d8669f2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/bitvector.q @@ -0,0 +1,3 @@ +set hive.mapred.mode=nonstrict; + +desc formatted src key; http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/queries/clientpositive/fm-sketch.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/fm-sketch.q b/ql/src/test/queries/clientpositive/fm-sketch.q new file mode 100644 index 0000000..6a65442 --- /dev/null +++ b/ql/src/test/queries/clientpositive/fm-sketch.q @@ -0,0 +1,58 @@ +set hive.mapred.mode=nonstrict; +set hive.stats.ndv.algo=fm; + +create table n(key int); + +insert overwrite table n select null from src; + +explain analyze table n compute statistics for columns; + +analyze table n compute statistics for columns; + +desc formatted n key; + + +create table i(key int); + +insert overwrite table i select key from src; + +explain analyze table i compute statistics for columns; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key double); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key decimal); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key date); + +insert into i values ('2012-08-17'); +insert into i values ('2012-08-17'); +insert into i values ('2013-08-17'); +insert into i values ('2012-03-17'); +insert into i values ('2012-05-17'); + +analyze table i compute statistics for columns; + +desc formatted i key; + http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/queries/clientpositive/hll.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/hll.q b/ql/src/test/queries/clientpositive/hll.q index edfdce8..91c4e78 100644 --- a/ql/src/test/queries/clientpositive/hll.q +++ b/ql/src/test/queries/clientpositive/hll.q @@ -1,5 +1,16 @@ set hive.mapred.mode=nonstrict; +create table n(key int); + +insert overwrite table n select null from src; + +explain analyze table n compute statistics for columns; + +analyze table n compute statistics for columns; + +desc formatted n key; + + create table i(key int); insert overwrite table i select key from src; http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alterColumnStats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/alterColumnStats.q.out b/ql/src/test/results/clientpositive/alterColumnStats.q.out index 519a62a..ea2416f 100644 --- a/ql/src/test/results/clientpositive/alterColumnStats.q.out +++ b/ql/src/test/results/clientpositive/alterColumnStats.q.out @@ -142,17 +142,17 @@ PREHOOK: Input: default@p POSTHOOK: query: desc formatted p c1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: desc formatted p c2 PREHOOK: type: DESCTABLE PREHOOK: Input: default@p POSTHOOK: query: desc formatted p c2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c2 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c2 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out index 672bd9f..a315a6b 100644 --- a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out +++ b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out @@ -65,9 +65,9 @@ PREHOOK: Input: default@p POSTHOOK: query: desc formatted p partition (c=1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type comment - -a int from deserializer +# col_name data_type comment + +a int from deserializer PREHOOK: query: desc formatted p partition (c=1) PREHOOK: type: DESCTABLE PREHOOK: Input: default@p http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_partition_update_status.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out index c0d4eee..700f07f 100644 --- a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out @@ -34,9 +34,11 @@ PREHOOK: Input: default@src_stat_part_one POSTHOOK: query: describe formatted src_stat_part_one PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_one -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS POSTHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') @@ -47,9 +49,11 @@ PREHOOK: Input: default@src_stat_part_one POSTHOOK: query: describe formatted src_stat_part_one PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_one -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 11 2.2 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 11 2.2 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: create table src_stat_part_two(key string, value string) partitioned by (px int, py string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -86,9 +90,11 @@ PREHOOK: Input: default@src_stat_part_two POSTHOOK: query: describe formatted src_stat_part_two PARTITION(px=1, py='a') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS POSTHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') @@ -99,9 +105,11 @@ PREHOOK: Input: default@src_stat_part_two POSTHOOK: query: describe formatted src_stat_part_two PARTITION(px=1, py='a') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 30 1.72 40 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 30 1.72 40 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:dummydb @@ -124,9 +132,11 @@ PREHOOK: Input: default@src_stat_part_two POSTHOOK: query: describe formatted default.src_stat_part_two PARTITION(px=1, py='a') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 40 1.72 50 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 40 1.72 50 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default