This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 60b656d HIVE-19647: use bitvectors in IN operators (#2598) (Soumyakanti Das reviewed by Zoltan Haindrich) 60b656d is described below commit 60b656da37c04ad6db7a12324e0b9ee079a80f84 Author: Soumyakanti Das <soumyakanti.das...@gmail.com> AuthorDate: Tue Sep 21 03:23:14 2021 -0700 HIVE-19647: use bitvectors in IN operators (#2598) (Soumyakanti Das reviewed by Zoltan Haindrich) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../stats/annotation/HiveMurmur3Adapter.java | 85 +++++++ .../stats/annotation/StatsRulesProcFactory.java | 50 +++- .../apache/hadoop/hive/ql/plan/ColStatistics.java | 9 + .../apache/hadoop/hive/ql/stats/StatsUtils.java | 7 + .../hive/ql/plan/mapping/TestStatEstimations.java | 39 ++- .../queries/clientpositive/in_bitvector_filter.q | 22 ++ .../clientpositive/llap/in_bitvector_filter.q.out | 274 +++++++++++++++++++++ 8 files changed, 485 insertions(+), 3 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f61b903..903a803 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2914,6 +2914,8 @@ public class HiveConf extends Configuration { "UDTFs change the number of rows of the output. A common UDTF is the explode() method that creates\n" + "multiple rows for each element in the input array. This factor is applied to the number of\n" + "output rows and output size."), + HIVE_STATS_USE_BITVECTORS("hive.stats.use.bitvectors", false, + "Enables to use bitvectors for estimating selectivity."), HIVE_STATS_MAX_NUM_STATS("hive.stats.max.num.stats", (long) 10000, "When the number of stats to be updated is huge, this value is used to control the number of \n" + " stats to be sent to HMS for update."), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/HiveMurmur3Adapter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/HiveMurmur3Adapter.java new file mode 100644 index 0000000..0baaa62 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/HiveMurmur3Adapter.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import java.nio.ByteBuffer; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hive.common.util.Murmur3; + +/** +* This class could be used to map Hive values type to Murmur3 hash values. +*/ +public class HiveMurmur3Adapter { + + private PrimitiveCategory type; + private PrimitiveObjectInspector inputOI; + + public HiveMurmur3Adapter(PrimitiveObjectInspector oi) throws HiveException { + this.inputOI = oi; + type = oi.getTypeInfo().getPrimitiveCategory(); + } + + private final ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES); + + public long murmur3(Object objVal) throws HiveException { + Object p = objVal; + switch (type) { + case BYTE: + case SHORT: + case INT: + case LONG: + case TIMESTAMP: { + long val = PrimitiveObjectInspectorUtils.getLong(objVal, inputOI); + LONG_BUFFER.putLong(0, val); + return Murmur3.hash64(LONG_BUFFER.array()); + } + case FLOAT: + case DOUBLE: { + double val = PrimitiveObjectInspectorUtils.getDouble(objVal, inputOI); + LONG_BUFFER.putDouble(0, val); + return Murmur3.hash64(LONG_BUFFER.array()); + } + case STRING: + case CHAR: + case VARCHAR: { + String val = PrimitiveObjectInspectorUtils.getString(objVal, inputOI); + return Murmur3.hash64(val.getBytes()); + } + case DECIMAL: { + HiveDecimal decimal = PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI); + LONG_BUFFER.putDouble(0, decimal.doubleValue()); + return Murmur3.hash64(LONG_BUFFER.array()); + } + case DATE: + DateWritable v = new DateWritable((DateWritable) inputOI.getPrimitiveWritableObject(p)); + int days = v.getDays(); + LONG_BUFFER.putLong(0, days); + return Murmur3.hash64(LONG_BUFFER.array()); + case BOOLEAN: + case BINARY: + default: + throw new HiveException("type: " + type + " is not supported"); + } + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 0a7c5e2..dc83827 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -36,6 +36,9 @@ import java.util.Stack; import com.google.common.base.Preconditions; import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -118,7 +121,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -537,10 +542,11 @@ public class StatsRulesProcFactory { } for (int i = 0; i < columnStats.size(); i++) { long dvs = columnStats.get(i) == null ? 0 : columnStats.get(i).getCountDistint(); + long intersectionSize = estimateIntersectionSize(aspCtx.getConf(), columnStats.get(i), values.get(i)); // (num of distinct vals for col in IN clause / num of distinct vals for col ) double columnFactor = dvs == 0 ? 0.5d : (1.0d / dvs); if (!multiColumn) { - columnFactor *=values.get(0).size(); + columnFactor *= intersectionSize; } // max can be 1, even when ndv is larger in IN clause than in column stats factor *= columnFactor > 1d ? 1d : columnFactor; @@ -555,6 +561,48 @@ public class StatsRulesProcFactory { return Math.round(numRows * factor * inFactor); } + private long estimateIntersectionSize(HiveConf conf, ColStatistics colStatistics, Set<ExprNodeDescEqualityWrapper> values) { + try { + boolean useBitVectors = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_USE_BITVECTORS); + if (!useBitVectors){ + return values.size(); + } + if (colStatistics == null) { + return values.size(); + } + byte[] bitVector = colStatistics.getBitVectors(); + if (bitVector == null) { + return values.size(); + } + NumDistinctValueEstimator sketch = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(bitVector); + if (!(sketch instanceof HyperLogLog)) { + return values.size(); + } + HyperLogLog hllCol = (HyperLogLog) sketch; + HyperLogLog hllVals = new HyperLogLog.HyperLogLogBuilder().build(); + + for (ExprNodeDescEqualityWrapper b : values) { + ObjectInspector oi = b.getExprNodeDesc().getWritableObjectInspector(); + HiveMurmur3Adapter hma = new HiveMurmur3Adapter((PrimitiveObjectInspector) oi); + ExprNodeConstantDesc c = (ExprNodeConstantDesc) b.getExprNodeDesc(); + hllVals.add(hma.murmur3(c.getWritableObjectInspector().getWritableConstantValue())); + } + + long cntA = hllCol.count(); + long cntB = hllVals.count(); + hllCol.merge(hllVals); + long cntU = hllCol.count(); + + long cntI = cntA + cntB - cntU; + if (cntI < 0) { + return 0; + } + return cntI; + } catch (HiveException e) { + throw new RuntimeException("checking!", e); + } + } + static class RangeOps { private String colType; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index 7e7b9a4..7142a05 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -31,6 +31,7 @@ public class ColStatistics { private boolean isPrimaryKey; private boolean isEstimated; private boolean isFilteredColumn; + private byte[] bitVectors; public ColStatistics(String colName, String colType) { this.setColumnName(colName); @@ -111,6 +112,14 @@ public class ColStatistics { this.range = r; } + public byte[] getBitVectors() { + return bitVectors; + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index b2c6fc2..56b3843 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -845,27 +845,32 @@ public class StatsUtils { cs.setNumNulls(csd.getLongStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive1()); cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); + cs.setBitVectors(csd.getLongStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) { cs.setCountDistint(csd.getLongStats().getNumDVs()); cs.setNumNulls(csd.getLongStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive2()); cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); + cs.setBitVectors(csd.getLongStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) { cs.setCountDistint(csd.getDoubleStats().getNumDVs()); cs.setNumNulls(csd.getDoubleStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive1()); cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); + cs.setBitVectors(csd.getDoubleStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) { cs.setCountDistint(csd.getDoubleStats().getNumDVs()); cs.setNumNulls(csd.getDoubleStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive2()); cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); + cs.setBitVectors(csd.getDoubleStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { cs.setCountDistint(csd.getStringStats().getNumDVs()); cs.setNumNulls(csd.getStringStats().getNumNulls()); cs.setAvgColLen(csd.getStringStats().getAvgColLen()); + cs.setBitVectors(csd.getStringStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) { if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) { cs.setCountDistint(2); @@ -906,6 +911,7 @@ public class StatsUtils { cs.setRange(minVal, maxVal); } } + cs.setBitVectors(csd.getDecimalStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); cs.setNumNulls(csd.getDateStats().getNumNulls()); @@ -914,6 +920,7 @@ public class StatsUtils { Long highVal = (csd.getDateStats().getHighValue() != null) ? csd.getDateStats().getHighValue() .getDaysSinceEpoch() : null; cs.setRange(lowVal, highVal); + cs.setBitVectors(csd.getDateStats().getBitVectors()); } else { // Columns statistics for complex datatypes are not supported yet return null; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java index 4a2d568..c9cfb2d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java @@ -35,6 +35,7 @@ import org.hamcrest.Matchers; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestRule; @@ -54,8 +55,8 @@ public class TestStatEstimations { String cmds[] = { // @formatter:off "create table t2(a integer, b string) STORED AS ORC", - "insert into t2 values(1, 'AAA'),(2, 'AAA'),(3, 'AAA'),(4, 'AAA'),(5, 'AAA')," + - "(6, 'BBB'),(7, 'BBB'),(8, 'BBB'),(9, 'BBB'),(10, 'BBB')", + "insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5')," + + "(6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(10, 'B5')", "analyze table t2 compute statistics for columns" // @formatter:on }; @@ -84,6 +85,39 @@ public class TestStatEstimations { } @Test + public void testFilterStringIn() throws ParseException, CommandProcessorException { + IDriver driver = createDriver(); + String query = "explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order by a"; + + PlanMapper pm = getMapperForQuery(driver, query); + List<FilterOperator> fos = pm.getAll(FilterOperator.class); + // the same operator is present 2 times + fos.sort(TestCounterMapping.OPERATOR_ID_COMPARATOR.reversed()); + FilterOperator fop = fos.get(0); + + // any estimation near 1 is ok...currently 1 + assertEquals(1, fop.getStatistics().getNumRows()); + } + + // FIXME: right now not in is transformed into AND( NE(...) , NE(...) ) + @Ignore + @Test + public void testFilterStringNotIn() throws CommandProcessorException { + IDriver driver = createDriver(); + String query = "explain select a from t2 where b NOT IN ('XXX', 'UUU') order by a"; + + PlanMapper pm = getMapperForQuery(driver, query); + List<FilterOperator> fos = pm.getAll(FilterOperator.class); + // the same operator is present 2 times + fos.sort(TestCounterMapping.OPERATOR_ID_COMPARATOR.reversed()); + assertEquals(1, fos.size()); + FilterOperator fop = fos.get(0); + + // any estimation near 10 is ok...currently 10 + assertEquals(10, fop.getStatistics().getNumRows()); + } + + @Test public void testFilterIntIn() throws ParseException, CommandProcessorException { IDriver driver = createDriver(); String query = "explain select a from t2 where a IN (-1,0,1,2,10,20,30,40) order by a"; @@ -103,6 +137,7 @@ public class TestStatEstimations { private static IDriver createDriver() { HiveConf conf = env_setup.getTestCtx().hiveConf; + conf.setBoolVar(ConfVars.HIVE_STATS_USE_BITVECTORS, true); conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false); conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); diff --git a/ql/src/test/queries/clientpositive/in_bitvector_filter.q b/ql/src/test/queries/clientpositive/in_bitvector_filter.q new file mode 100644 index 0000000..177d36f --- /dev/null +++ b/ql/src/test/queries/clientpositive/in_bitvector_filter.q @@ -0,0 +1,22 @@ +create table t2(a integer, b string) STORED AS ORC; +insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5'), + (6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(20, 'B5'); +analyze table t2 compute statistics for columns; + +set hive.stats.fetch.column.stats=true; + +set hive.stats.use.bitvectors=false; + +-- 1,2,10,11,12,13,14,15,20 => 9 +explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a; + +set hive.stats.use.bitvectors=true; + +-- 1,2,20 => 3 +explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a; + +-- A3 only => 1 +explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order by a; + +-- A3,B1,B5 => 3 +explain select a from t2 where b IN ('A3', 'B1', 'B5') order by a; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/in_bitvector_filter.q.out b/ql/src/test/results/clientpositive/llap/in_bitvector_filter.q.out new file mode 100644 index 0000000..7994fdd --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/in_bitvector_filter.q.out @@ -0,0 +1,274 @@ +PREHOOK: query: create table t2(a integer, b string) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2(a integer, b string) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5'), + (6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(20, 'B5') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5'), + (6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(20, 'B5') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a SCRIPT [] +POSTHOOK: Lineage: t2.b SCRIPT [] +PREHOOK: query: analyze table t2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table t2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20, 30, 40) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20, 30, 40) (type: boolean) + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20, 30, 40) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20, 30, 40) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (b) IN ('A3', 'ABC', 'AXZ') (type: boolean) + Statistics: Num rows: 10 Data size: 900 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (b) IN ('A3', 'ABC', 'AXZ') (type: boolean) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a from t2 where b IN ('A3', 'B1', 'B5') order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t2 where b IN ('A3', 'B1', 'B5') order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (b) IN ('A3', 'B1', 'B5') (type: boolean) + Statistics: Num rows: 10 Data size: 900 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (b) IN ('A3', 'B1', 'B5') (type: boolean) + Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +