http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index fe6ee17..47a1107 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -226,6 +226,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_inner_join.q,\ vector_interval_1.q,\ vector_interval_2.q,\ + vector_interval_mapjoin.q,\ vector_join30.q,\ vector_join_filters.q,\ vector_join_nulls.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 6b95360..6654166 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -35,6 +35,16 @@ import org.apache.hadoop.io.Writable; public abstract class ColumnVector { /* + * The current kinds of column vectors. + */ + public static enum Type { + LONG, + DOUBLE, + BYTES, + DECIMAL + } + + /* * If hasNulls is true, then this array contains true if the value * is null, otherwise false. The array is always allocated, so a batch can be re-used * later and nulls added. http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java index 8c4b6ea..6673509 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java @@ -20,7 +20,12 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.Arrays; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * Class to keep information on a set of typed vector columns. Used by @@ -117,28 +122,41 @@ public class VectorColumnSetInfo { protected void addKey(String outputType) throws HiveException { indexLookup[addIndex] = new KeyLookupHelper(); - if (VectorizationContext.isIntFamily(outputType) || - VectorizationContext.isDatetimeFamily(outputType)) { + + String typeName = VectorizationContext.mapTypeNameSynonyms(outputType); + + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + + switch (columnVectorType) { + case LONG: longIndices[longIndicesIndex] = addIndex; indexLookup[addIndex].setLong(longIndicesIndex); ++longIndicesIndex; - } else if (VectorizationContext.isFloatFamily(outputType)) { + break; + + case DOUBLE: doubleIndices[doubleIndicesIndex] = addIndex; indexLookup[addIndex].setDouble(doubleIndicesIndex); ++doubleIndicesIndex; - } else if (VectorizationContext.isStringFamily(outputType) || - outputType.equalsIgnoreCase("binary")) { + break; + + case BYTES: stringIndices[stringIndicesIndex]= addIndex; indexLookup[addIndex].setString(stringIndicesIndex); ++stringIndicesIndex; - } else if (VectorizationContext.isDecimalFamily(outputType)) { - decimalIndices[decimalIndicesIndex]= addIndex; - indexLookup[addIndex].setDecimal(decimalIndicesIndex); - ++decimalIndicesIndex; - } - else { - throw new HiveException("Unsuported vector output type: " + outputType); + break; + + case DECIMAL: + decimalIndices[decimalIndicesIndex]= addIndex; + indexLookup[addIndex].setDecimal(decimalIndicesIndex); + ++decimalIndicesIndex; + break; + + default: + throw new HiveException("Unexpected column vector type " + columnVectorType); } + addIndex++; } http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index 0058141..f12bfde 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -20,6 +20,12 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * This class copies specified columns of a row from one VectorizedRowBatch to another. @@ -186,7 +192,7 @@ public class VectorCopyRow { private CopyRow[] subRowToBatchCopiersByValue; private CopyRow[] subRowToBatchCopiersByReference; - public void init(VectorColumnMapping columnMapping) { + public void init(VectorColumnMapping columnMapping) throws HiveException { int count = columnMapping.getCount(); subRowToBatchCopiersByValue = new CopyRow[count]; subRowToBatchCopiersByReference = new CopyRow[count]; @@ -194,24 +200,35 @@ public class VectorCopyRow { for (int i = 0; i < count; i++) { int inputColumn = columnMapping.getInputColumns()[i]; int outputColumn = columnMapping.getOutputColumns()[i]; - String typeName = columnMapping.getTypeNames()[i]; + String typeName = columnMapping.getTypeNames()[i].toLowerCase(); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); CopyRow copyRowByValue = null; CopyRow copyRowByReference = null; - if (VectorizationContext.isIntFamily(typeName) || - VectorizationContext.isDatetimeFamily(typeName)) { + switch (columnVectorType) { + case LONG: copyRowByValue = new LongCopyRow(inputColumn, outputColumn); - } else if (VectorizationContext.isFloatFamily(typeName)) { + break; + + case DOUBLE: copyRowByValue = new DoubleCopyRow(inputColumn, outputColumn); - } else if (VectorizationContext.isStringFamily(typeName)) { + break; + + case BYTES: copyRowByValue = new BytesCopyRowByValue(inputColumn, outputColumn); copyRowByReference = new BytesCopyRowByReference(inputColumn, outputColumn); - } else if (VectorizationContext.decimalTypePattern.matcher(typeName).matches()){ + break; + + case DECIMAL: copyRowByValue = new DecimalCopyRow(inputColumn, outputColumn); - } else { - throw new RuntimeException("Cannot allocate vector copy row for " + typeName); + break; + + default: + throw new HiveException("Unexpected column vector type " + columnVectorType); } + subRowToBatchCopiersByValue[i] = copyRowByValue; if (copyRowByReference == null) { subRowToBatchCopiersByReference[i] = copyRowByValue; http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 61d2972..392e56d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -107,6 +107,7 @@ import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; @@ -2044,6 +2045,51 @@ public class VectorizationContext { } } + public static String mapTypeNameSynonyms(String typeName) { + typeName = typeName.toLowerCase(); + if (typeName.equals("long")) { + return "bigint"; + } else if (typeName.equals("string_family")) { + return "string"; + } else { + return typeName; + } + } + + public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) throws HiveException { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + + switch (primitiveCategory) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case DATE: + case TIMESTAMP: + case INTERVAL_YEAR_MONTH: + case INTERVAL_DAY_TIME: + return ColumnVector.Type.LONG; + + case FLOAT: + case DOUBLE: + return ColumnVector.Type.DOUBLE; + + case STRING: + case CHAR: + case VARCHAR: + case BINARY: + return ColumnVector.Type.BYTES; + + case DECIMAL: + return ColumnVector.Type.DECIMAL; + + default: + throw new HiveException("Unexpected primitive type category " + primitiveCategory); + } + } + // TODO: When we support vectorized STRUCTs and can handle more in the reduce-side (MERGEPARTIAL): // TODO: Write reduce-side versions of AVG. Currently, only map-side (HASH) versions are in table. // TODO: And, investigate if different reduce-side versions are needed for var* and std*, or if map-side aggregate can be used.. Right now they are conservatively http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index af78776..4c8c4b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedCreateHashTable; @@ -68,8 +69,12 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * This class is common operator class for native vectorized map join. @@ -658,7 +663,7 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem * Setup our 2nd batch with the same "column schema" as the big table batch that can be used to * build join output results in. */ - protected VectorizedRowBatch setupOverflowBatch() { + protected VectorizedRowBatch setupOverflowBatch() throws HiveException { VectorizedRowBatch overflowBatch; Map<Integer, String> scratchColumnTypeMap = vOutContext.getScratchColumnTypeMap(); @@ -701,23 +706,38 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem * Allocate overflow batch columns by hand. */ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, int outputColumn, - String typeName) { + String typeName) throws HiveException { if (overflowBatch.cols[outputColumn] == null) { - String vectorTypeName; - if (VectorizationContext.isIntFamily(typeName) || - VectorizationContext.isDatetimeFamily(typeName)) { - vectorTypeName = "long"; - } else if (VectorizationContext.isFloatFamily(typeName)) { - vectorTypeName = "double"; - } else if (VectorizationContext.isStringFamily(typeName)) { - vectorTypeName = "string"; - } else if (VectorizationContext.decimalTypePattern.matcher(typeName).matches()){ - vectorTypeName = typeName; // Keep precision and scale. - } else { - throw new RuntimeException("Cannot determine vector type for " + typeName); + typeName = VectorizationContext.mapTypeNameSynonyms(typeName); + + String columnVectorTypeName; + + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + + switch (columnVectorType) { + case LONG: + columnVectorTypeName = "long"; + break; + + case DOUBLE: + columnVectorTypeName = "double"; + break; + + case BYTES: + columnVectorTypeName = "string"; + break; + + case DECIMAL: + columnVectorTypeName = typeName; // Keep precision and scale. + break; + + default: + throw new HiveException("Unexpected column vector type " + columnVectorType); } - overflowBatch.cols[outputColumn] = VectorizedRowBatchCtx.allocateColumnVector(vectorTypeName, VectorizedRowBatch.DEFAULT_SIZE); + + overflowBatch.cols[outputColumn] = VectorizedRowBatchCtx.allocateColumnVector(columnVectorTypeName, VectorizedRowBatch.DEFAULT_SIZE); if (LOG.isDebugEnabled()) { LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName()); http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/test/queries/clientpositive/vector_interval_mapjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_interval_mapjoin.q b/ql/src/test/queries/clientpositive/vector_interval_mapjoin.q new file mode 100644 index 0000000..9a58658 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_interval_mapjoin.q @@ -0,0 +1,87 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.auto.convert.join=true; + +create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k; + +CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k; + +create table vectortab_b_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE vectortab_b_1k; + +CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k; + +explain +select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s; + +select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/test/results/clientpositive/tez/vector_interval_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vector_interval_mapjoin.q.out new file mode 100644 index 0000000..b4d3477 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_interval_mapjoin.q.out @@ -0,0 +1,281 @@ +PREHOOK: query: create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_a_1k +POSTHOOK: query: create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_a_1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab_a_1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab_a_1k +PREHOOK: query: CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab_a_1k +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_a_1korc +POSTHOOK: query: CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab_a_1k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_a_1korc +PREHOOK: query: create table vectortab_b_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_b_1k +POSTHOOK: query: create table vectortab_b_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_b_1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE vectortab_b_1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab_b_1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE vectortab_b_1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab_b_1k +PREHOOK: query: CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab_b_1k +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_b_1korc +POSTHOOK: query: CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab_b_1k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_b_1korc +PREHOOK: query: explain +select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab_a_1korc + Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s is not null (type: boolean) + Statistics: Num rows: 500 Data size: 230132 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 230132 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 115066 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: interval_day_time), _col0 (type: string) + 1 _col1 (type: interval_day_time), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: vectortab_b_1korc + Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s is not null (type: boolean) + Statistics: Num rows: 500 Data size: 229224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 229224 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 114612 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: interval_day_time), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: interval_day_time), _col0 (type: string) + Statistics: Num rows: 250 Data size: 114612 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab_a_1korc +PREHOOK: Input: default@vectortab_b_1korc +#### A masked pattern was here #### +POSTHOOK: query: select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab_a_1korc +POSTHOOK: Input: default@vectortab_b_1korc +#### A masked pattern was here #### http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out new file mode 100644 index 0000000..976091b --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out @@ -0,0 +1,281 @@ +PREHOOK: query: create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_a_1k +POSTHOOK: query: create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_a_1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab_a_1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab_a_1k +PREHOOK: query: CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab_a_1k +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_a_1korc +POSTHOOK: query: CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab_a_1k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_a_1korc +PREHOOK: query: create table vectortab_b_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_b_1k +POSTHOOK: query: create table vectortab_b_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_b_1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE vectortab_b_1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab_b_1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE vectortab_b_1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab_b_1k +PREHOOK: query: CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab_b_1k +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_b_1korc +POSTHOOK: query: CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab_b_1k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_b_1korc +PREHOOK: query: explain +select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + v2:vectortab_b_1korc + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + v2:vectortab_b_1korc + TableScan + alias: vectortab_b_1korc + Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s is not null (type: boolean) + Statistics: Num rows: 500 Data size: 229224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 229224 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 114612 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: interval_day_time), _col0 (type: string) + 1 _col1 (type: interval_day_time), _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab_a_1korc + Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s is not null (type: boolean) + Statistics: Num rows: 500 Data size: 230132 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 230132 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 115066 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: interval_day_time), _col0 (type: string) + 1 _col1 (type: interval_day_time), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab_a_1korc +PREHOOK: Input: default@vectortab_b_1korc +#### A masked pattern was here #### +POSTHOOK: query: select + v1.s, + v2.s, + v1.intrvl1 +from + ( select + s, + (cast(dt as date) - cast(ts as date)) as intrvl1 + from + vectortab_a_1korc ) v1 +join + ( + select + s , + (cast(dt as date) - cast(ts as date)) as intrvl2 + from + vectortab_b_1korc + ) v2 + on v1.intrvl1 = v2.intrvl2 + and v1.s = v2.s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab_a_1korc +POSTHOOK: Input: default@vectortab_b_1korc +#### A masked pattern was here ####