http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java index c9078be..9302791 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.io.orc.encoded; +import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; +import org.apache.orc.OrcFile; import org.apache.orc.impl.RunLengthByteReader; import java.io.IOException; @@ -1200,6 +1202,147 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { } } + protected static class Decimal64StreamReader extends Decimal64TreeReader implements SettableTreeReader { + private boolean _isFileCompressed; + private SettableUncompressedStream _presentStream; + private SettableUncompressedStream _valueStream; + private List<ColumnVector> vectors; + private int vectorIndex = 0; + + private Decimal64StreamReader(int columnId, int precision, int scale, + SettableUncompressedStream presentStream, + SettableUncompressedStream valueStream, + boolean isFileCompressed, + OrcProto.ColumnEncoding encoding, TreeReaderFactory.Context context, + List<ColumnVector> vectors) throws IOException { + super(columnId, presentStream, valueStream, encoding, + precision, scale, context); + this._isFileCompressed = isFileCompressed; + this._presentStream = presentStream; + this._valueStream = valueStream; + this.vectors = vectors; + } + + @Override + public void seek(PositionProvider index) throws IOException { + if (vectors != null) return; + if (present != null) { + if (_isFileCompressed) { + index.getNext(); + } + present.seek(index); + } + + // data stream could be empty stream or already reached end of stream before present stream. + // This can happen if all values in stream are nulls or last row group values are all null. + skipCompressedIndex(_isFileCompressed, index); + if (_valueStream.available() > 0) { + valueReader.seek(index); + } else { + skipSeek(index); + } + } + + @Override + public void nextVector( + ColumnVector previousVector, boolean[] isNull, int batchSize) throws IOException { + if (vectors == null) { + super.nextVector(previousVector, isNull, batchSize); + return; + } + vectors.get(vectorIndex++).shallowCopyTo(previousVector); + if (vectorIndex == vectors.size()) { + vectors = null; + } + } + + @Override + public void setBuffers(EncodedColumnBatch<OrcBatchKey> batch, boolean sameStripe) { + assert vectors == null; // See the comment in TimestampStreamReader.setBuffers. + ColumnStreamData[] streamsData = batch.getColumnData(columnId); + if (_presentStream != null) { + _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE])); + } + if (_valueStream != null) { + _valueStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE])); + } + } + + public static class StreamReaderBuilder { + private int columnIndex; + private ColumnStreamData presentStream; + private ColumnStreamData valueStream; + private int scale; + private int precision; + private CompressionCodec compressionCodec; + private OrcProto.ColumnEncoding columnEncoding; + private List<ColumnVector> vectors; + private TreeReaderFactory.Context context; + + public StreamReaderBuilder setColumnIndex(int columnIndex) { + this.columnIndex = columnIndex; + return this; + } + + public StreamReaderBuilder setPrecision(int precision) { + this.precision = precision; + return this; + } + + public StreamReaderBuilder setScale(int scale) { + this.scale = scale; + return this; + } + + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + + public StreamReaderBuilder setPresentStream(ColumnStreamData presentStream) { + this.presentStream = presentStream; + return this; + } + + public StreamReaderBuilder setValueStream(ColumnStreamData valueStream) { + this.valueStream = valueStream; + return this; + } + + + public StreamReaderBuilder setCompressionCodec(CompressionCodec compressionCodec) { + this.compressionCodec = compressionCodec; + return this; + } + + public StreamReaderBuilder setColumnEncoding(OrcProto.ColumnEncoding encoding) { + this.columnEncoding = encoding; + return this; + } + + public Decimal64StreamReader build() throws IOException { + SettableUncompressedStream presentInStream = StreamUtils.createSettableUncompressedStream( + OrcProto.Stream.Kind.PRESENT.name(), presentStream); + + SettableUncompressedStream valueInStream = StreamUtils.createSettableUncompressedStream( + OrcProto.Stream.Kind.DATA.name(), valueStream); + + boolean isFileCompressed = compressionCodec != null; + return new Decimal64StreamReader(columnIndex, precision, scale, presentInStream, + valueInStream, isFileCompressed, columnEncoding, context, vectors); + } + + public StreamReaderBuilder setVectors(List<ColumnVector> vectors) { + this.vectors = vectors; + return this; + } + } + + public static StreamReaderBuilder builder() { + return new StreamReaderBuilder(); + } + } + protected static class DateStreamReader extends DateTreeReader implements SettableTreeReader { private boolean isFileCompressed; private SettableUncompressedStream _presentStream; @@ -2101,8 +2244,8 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { } public static StructTreeReader createRootTreeReader(TypeDescription[] batchSchemas, - List<OrcProto.ColumnEncoding> encodings, OrcEncodedColumnBatch batch, - CompressionCodec codec, TreeReaderFactory.Context context) throws IOException { + List<OrcProto.ColumnEncoding> encodings, OrcEncodedColumnBatch batch, + CompressionCodec codec, Context context, final boolean useDecimal64ColumnVectors) throws IOException { // Note: we only look at the schema here to deal with complex types. Somebody has set up the // reader with whatever ideas they had to the schema and we just trust the reader to // produce the CVBs that was asked for. However, we only need to look at top level columns. @@ -2117,7 +2260,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) { throw new AssertionError("No data for column " + batchColIx + ": " + batchSchemas[i]); } - childReaders[i] = createEncodedTreeReader(batchSchemas[i], encodings, batch, codec, context); + childReaders[i] = createEncodedTreeReader(batchSchemas[i], encodings, batch, codec, context, useDecimal64ColumnVectors); } // TODO: do we actually need this reader? the caller just extracts child readers. @@ -2138,8 +2281,8 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private static TreeReader createEncodedTreeReader(TypeDescription schema, - List<OrcProto.ColumnEncoding> encodings, OrcEncodedColumnBatch batch, - CompressionCodec codec, TreeReaderFactory.Context context) throws IOException { + List<OrcProto.ColumnEncoding> encodings, OrcEncodedColumnBatch batch, + CompressionCodec codec, Context context, final boolean useDecimal64ColumnVectors) throws IOException { int columnIndex = schema.getId(); ColumnStreamData[] streamBuffers = null; List<ColumnVector> vectors = null; @@ -2200,12 +2343,12 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { case TIMESTAMP: case DATE: return getPrimitiveTreeReader(columnIndex, schema, codec, columnEncoding, - present, data, dictionary, lengths, secondary, context, vectors); + present, data, dictionary, lengths, secondary, context, vectors, useDecimal64ColumnVectors); case LIST: assert vectors == null; // Not currently supported. TypeDescription elementType = schema.getChildren().get(0); TreeReader elementReader = createEncodedTreeReader( - elementType, encodings, batch, codec, context); + elementType, encodings, batch, codec, context, useDecimal64ColumnVectors); return ListStreamReader.builder() .setColumnIndex(columnIndex) .setColumnEncoding(columnEncoding) @@ -2220,9 +2363,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { TypeDescription keyType = schema.getChildren().get(0); TypeDescription valueType = schema.getChildren().get(1); TreeReader keyReader = createEncodedTreeReader( - keyType, encodings, batch, codec, context); + keyType, encodings, batch, codec, context, useDecimal64ColumnVectors); TreeReader valueReader = createEncodedTreeReader( - valueType, encodings, batch, codec, context); + valueType, encodings, batch, codec, context, useDecimal64ColumnVectors); return MapStreamReader.builder() .setColumnIndex(columnIndex) .setColumnEncoding(columnEncoding) @@ -2240,7 +2383,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { for (int i = 0; i < childCount; i++) { TypeDescription childType = schema.getChildren().get(i); childReaders[i] = createEncodedTreeReader( - childType, encodings, batch, codec, context); + childType, encodings, batch, codec, context, useDecimal64ColumnVectors); } return StructStreamReader.builder() .setColumnIndex(columnIndex) @@ -2258,7 +2401,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { for (int i = 0; i < childCount; i++) { TypeDescription childType = schema.getChildren().get(i); childReaders[i] = createEncodedTreeReader( - childType, encodings, batch, codec, context); + childType, encodings, batch, codec, context, useDecimal64ColumnVectors); } return UnionStreamReader.builder() .setColumnIndex(columnIndex) @@ -2276,10 +2419,10 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { } private static TreeReader getPrimitiveTreeReader(final int columnIndex, - TypeDescription columnType, CompressionCodec codec, OrcProto.ColumnEncoding columnEncoding, - ColumnStreamData present, ColumnStreamData data, ColumnStreamData dictionary, - ColumnStreamData lengths, ColumnStreamData secondary, TreeReaderFactory.Context context, - List<ColumnVector> vectors) throws IOException { + TypeDescription columnType, CompressionCodec codec, OrcProto.ColumnEncoding columnEncoding, + ColumnStreamData present, ColumnStreamData data, ColumnStreamData dictionary, + ColumnStreamData lengths, ColumnStreamData secondary, Context context, + List<ColumnVector> vectors, final boolean useDecimal64ColumnVectors) throws IOException { switch (columnType.getCategory()) { case BINARY: return BinaryStreamReader.builder() @@ -2390,7 +2533,36 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setVectors(vectors) .build(); case DECIMAL: - return DecimalStreamReader.builder() + // special handling for serde reader (text) in llap IO. + // if file format version is null, then we are processing text IF in LLAP IO, in which case + // we get vectors instead of streams. If vectors contain instance of Decimal64ColumnVector we + // should use Decimal64StreamReader (which acts as a wrapper around vectors) + boolean useDecimal64Reader = context.getFileFormat() == null && vectors != null && useDecimal64ColumnVectors; + if (useDecimal64Reader) { + boolean containDecimal64CV = false; + for (ColumnVector vector : vectors) { + if (vector instanceof Decimal64ColumnVector) { + containDecimal64CV = true; + break; + } + } + useDecimal64Reader &= containDecimal64CV; + } + if ((context.getFileFormat() == OrcFile.Version.UNSTABLE_PRE_2_0 || useDecimal64Reader) && + columnType.getPrecision() <= TypeDescription.MAX_DECIMAL64_PRECISION) { + return Decimal64StreamReader.builder() + .setColumnIndex(columnIndex) + .setPrecision(columnType.getPrecision()) + .setScale(columnType.getScale()) + .setPresentStream(present) + .setValueStream(data) + .setCompressionCodec(codec) + .setColumnEncoding(columnEncoding) + .setVectors(vectors) + .setContext(context) + .build(); + } else { + return DecimalStreamReader.builder() .setColumnIndex(columnIndex) .setPrecision(columnType.getPrecision()) .setScale(columnType.getScale()) @@ -2402,6 +2574,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setVectors(vectors) .setContext(context) .build(); + } case TIMESTAMP: return TimestampStreamReader.builder() .setColumnIndex(columnIndex)
http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index ed6d577..5e70a05 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.common.io.DataCache; import org.apache.hadoop.hive.common.io.FileMetadataCache; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.LlapCacheOnlyInputFormatInterface; @@ -115,4 +116,9 @@ public class MapredParquetInputFormat extends FileInputFormat<NullWritable, Arra return true; } + + @Override + public VectorizedSupport.Support[] getSupportedFeatures() { + return null; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 394f826..f4e8207 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimizer.physical; import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; +import java.io.IOException; import java.io.Serializable; import java.lang.annotation.Annotation; import java.util.ArrayList; @@ -40,7 +41,9 @@ import java.util.regex.Pattern; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.reducesink.*; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator; import org.slf4j.Logger; @@ -129,7 +132,6 @@ import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType; import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; -import org.apache.hadoop.hive.ql.plan.VectorizationCondition; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc; @@ -1207,6 +1209,14 @@ public class Vectorizer implements PhysicalPlanResolver { private Support[] getVectorizedInputFormatSupports( Class<? extends InputFormat> inputFileFormatClass) { + try { + InputFormat inputFormat = FetchOperator.getInputFormatFromCache(inputFileFormatClass, hiveConf); + if (inputFormat instanceof VectorizedInputFormatInterface) { + return ((VectorizedInputFormatInterface) inputFormat).getSupportedFeatures(); + } + } catch (IOException e) { + LOG.error("Unable to instantiate {} input format class. Cannot determine vectorization support.", e); + } // FUTURE: Decide how to ask an input file format what vectorization features it supports. return null; } @@ -1830,14 +1840,6 @@ public class Vectorizer implements PhysicalPlanResolver { supportRemovedReasons.add(removeString); } - // And, if LLAP is enabled for now, disable DECIMAL_64; - if (isLlapIoEnabled && supportSet.contains(Support.DECIMAL_64)) { - supportSet.remove(Support.DECIMAL_64); - String removeString = - "DECIMAL_64 disabled because LLAP is enabled"; - supportRemovedReasons.add(removeString); - } - // Now rememember what is supported for this query and any support that was // removed. vectorTaskColumnInfo.setSupportSetInUse(supportSet); @@ -2246,6 +2248,7 @@ public class Vectorizer implements PhysicalPlanResolver { @Override public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticException { + physicalContext = physicalContext; hiveConf = physicalContext.getConf(); planMapper = physicalContext.getContext().getPlanMapper(); @@ -4265,6 +4268,13 @@ public class Vectorizer implements PhysicalPlanResolver { vecAggrClasses, aggregateName, inputColVectorType, outputColVectorType, udafEvaluatorMode); if (vecAggrClass != null) { + // for now, disable operating on decimal64 column vectors for semijoin reduction as + // we have to make sure same decimal type should be used during bloom filter creation + // and bloom filter probing + if (aggregateName.equals("bloom_filter")) { + inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression); + inputColVectorType = ColumnVector.Type.DECIMAL; + } final VectorAggregationDesc vecAggrDesc = new VectorAggregationDesc( aggrDesc, evaluator, inputTypeInfo, inputColVectorType, inputExpression, @@ -4359,8 +4369,6 @@ public class Vectorizer implements PhysicalPlanResolver { return new ImmutablePair<Operator<? extends OperatorDesc>, String>(vectorOp, null); } - static int fake; - public static Operator<? extends OperatorDesc> vectorizeSelectOperator( Operator<? extends OperatorDesc> selectOp, VectorizationContext vContext, VectorSelectDesc vectorSelectDesc) @@ -4386,6 +4394,13 @@ public class Vectorizer implements PhysicalPlanResolver { if (index < size) { vectorSelectExprs = Arrays.copyOf(vectorSelectExprs, index); } + + // Fix up the case where parent expression's output data type physical variations is DECIMAL whereas + // at least one of its children is DECIMAL_64. Some expressions like x % y for example only accepts DECIMAL + // for x and y (at this time there is only DecimalColModuloDecimalColumn so both x and y has to be DECIMAL). + // The following method introduces a cast if x or y is DECIMAL_64 and parent expression (x % y) is DECIMAL. + fixDecimalDataTypePhysicalVariations(vContext, vectorSelectExprs); + vectorSelectDesc.setSelectExpressions(vectorSelectExprs); vectorSelectDesc.setProjectedOutputColumns(projectedOutputColumns); @@ -4394,6 +4409,84 @@ public class Vectorizer implements PhysicalPlanResolver { vContext, vectorSelectDesc); } + private static void fixDecimalDataTypePhysicalVariations(final VectorizationContext vContext, + final VectorExpression[] vectorSelectExprs) throws HiveException { + for (int i = 0; i < vectorSelectExprs.length; i++) { + VectorExpression parent = vectorSelectExprs[i]; + VectorExpression newParent = fixDecimalDataTypePhysicalVariations(parent, parent.getChildExpressions(), + vContext); + if (parent.getClass() == newParent.getClass() && parent != newParent) { + vectorSelectExprs[i] = newParent; + } + } + } + + private static VectorExpression fixDecimalDataTypePhysicalVariations(final VectorExpression parent, + final VectorExpression[] children, final VectorizationContext vContext) throws HiveException { + if (children == null || children.length == 0) { + return parent; + } + + for (int i = 0; i < children.length; i++) { + VectorExpression child = children[i]; + VectorExpression newChild = fixDecimalDataTypePhysicalVariations(child, child.getChildExpressions(), vContext); + if (child.getClass() == newChild.getClass() && child != newChild) { + children[i] = newChild; + } + } + if (parent.getOutputDataTypePhysicalVariation() == DataTypePhysicalVariation.NONE) { + boolean inputArgsChanged = false; + DataTypePhysicalVariation[] dataTypePhysicalVariations = parent.getInputDataTypePhysicalVariations(); + VectorExpression oldExpression = null; + VectorExpression newExpression = null; + for (int i = 0; i < children.length; i++) { + oldExpression = children[i]; + // we found at least one children with mismatch + if (oldExpression.getOutputDataTypePhysicalVariation() == DataTypePhysicalVariation.DECIMAL_64) { + newExpression = vContext.wrapWithDecimal64ToDecimalConversion(oldExpression); + children[i] = newExpression; + inputArgsChanged = true; + dataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE; + } + } + // fix up the input column numbers and output column numbers + if (inputArgsChanged) { + if (parent instanceof VectorUDFAdaptor) { + VectorUDFAdaptor parentAdaptor = (VectorUDFAdaptor) parent; + VectorUDFArgDesc[] argDescs = parentAdaptor.getArgDescs(); + for (VectorUDFArgDesc argDesc : argDescs) { + if (argDesc.getColumnNum() == oldExpression.getOutputColumnNum()) { + argDesc.setColumnNum(newExpression.getOutputColumnNum()); + break; + } + } + } else { + int argumentCount = children.length + (parent.getOutputColumnNum() == -1 ? 0 : 1); + Object[] arguments = new Object[argumentCount]; + // new input column numbers + for (int i = 0; i < children.length; i++) { + VectorExpression vce = children[i]; + arguments[i] = vce.getOutputColumnNum(); + } + // retain output column number from parent + if (parent.getOutputColumnNum() != -1) { + arguments[arguments.length - 1] = parent.getOutputColumnNum(); + } + // re-instantiate the parent expression with new arguments + VectorExpression newParent = vContext.instantiateExpression(parent.getClass(), parent.getOutputTypeInfo(), + parent.getOutputDataTypePhysicalVariation(), arguments); + newParent.setOutputTypeInfo(parent.getOutputTypeInfo()); + newParent.setOutputDataTypePhysicalVariation(parent.getOutputDataTypePhysicalVariation()); + newParent.setInputTypeInfos(parent.getInputTypeInfos()); + newParent.setInputDataTypePhysicalVariations(dataTypePhysicalVariations); + newParent.setChildExpressions(parent.getChildExpressions()); + return newParent; + } + } + } + return parent; + } + private static void fillInPTFEvaluators( List<WindowFunctionDef> windowsFunctions, String[] evaluatorFunctionNames, http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index dc58ad1..fe475f6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -229,7 +229,7 @@ public class TestInputOutputFormat { return "booleanValue,byteValue,shortValue,intValue,longValue,floatValue,doubleValue,stringValue,decimalValue,dateValue,timestampValue"; } static String getColumnTypesProperty() { - return "boolean:tinyint:smallint:int:bigint:float:double:string:decimal:date:timestamp"; + return "boolean:tinyint:smallint:int:bigint:float:double:string:decimal(38,18):date:timestamp"; } } @@ -3847,9 +3847,10 @@ public class TestInputOutputFormat { * Test schema evolution when using the reader directly. */ @Test - public void testSchemaEvolution() throws Exception { + public void testSchemaEvolutionOldDecimal() throws Exception { TypeDescription fileSchema = TypeDescription.fromString("struct<a:int,b:struct<c:int>,d:string>"); + conf.set(ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED.varname, "decimal_64"); Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .fileSystem(fs) @@ -3915,6 +3916,78 @@ public class TestInputOutputFormat { } /** + * Test schema evolution when using the reader directly. + */ + @Test + public void testSchemaEvolutionDecimal64() throws Exception { + TypeDescription fileSchema = + TypeDescription.fromString("struct<a:int,b:struct<c:int>,d:string>"); + conf.set(ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED.varname, "decimal_64"); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .fileSystem(fs) + .setSchema(fileSchema) + .compress(org.apache.orc.CompressionKind.NONE)); + VectorizedRowBatch batch = fileSchema.createRowBatch(TypeDescription.RowBatchVersion.USE_DECIMAL64,1000); + batch.size = 1000; + LongColumnVector lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]); + for(int r=0; r < 1000; r++) { + ((LongColumnVector) batch.cols[0]).vector[r] = r * 42; + lcv.vector[r] = r * 10001; + ((BytesColumnVector) batch.cols[2]).setVal(r, + Integer.toHexString(r).getBytes(StandardCharsets.UTF_8)); + } + writer.addRowBatch(batch); + writer.close(); + TypeDescription readerSchema = TypeDescription.fromString( + "struct<a:int,b:struct<c:int,future1:int>,d:string,future2:int>"); + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + RecordReader rows = reader.rowsOptions(new Reader.Options() + .schema(readerSchema)); + batch = readerSchema.createRowBatchV2(); + lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]); + LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]); + assertEquals(true, rows.nextBatch(batch)); + assertEquals(1000, batch.size); + assertEquals(true, future1.isRepeating); + assertEquals(true, future1.isNull[0]); + assertEquals(true, batch.cols[3].isRepeating); + assertEquals(true, batch.cols[3].isNull[0]); + for(int r=0; r < batch.size; ++r) { + assertEquals("row " + r, r * 42, ((LongColumnVector) batch.cols[0]).vector[r]); + assertEquals("row " + r, r * 10001, lcv.vector[r]); + assertEquals("row " + r, r * 10001, lcv.vector[r]); + assertEquals("row " + r, Integer.toHexString(r), + ((BytesColumnVector) batch.cols[2]).toString(r)); + } + assertEquals(false, rows.nextBatch(batch)); + rows.close(); + + // try it again with an include vector + rows = reader.rowsOptions(new Reader.Options() + .schema(readerSchema) + .include(new boolean[]{false, true, true, true, false, false, true})); + batch = readerSchema.createRowBatchV2(); + lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]); + future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]); + assertEquals(true, rows.nextBatch(batch)); + assertEquals(1000, batch.size); + assertEquals(true, future1.isRepeating); + assertEquals(true, future1.isNull[0]); + assertEquals(true, batch.cols[3].isRepeating); + assertEquals(true, batch.cols[3].isNull[0]); + assertEquals(true, batch.cols[2].isRepeating); + assertEquals(true, batch.cols[2].isNull[0]); + for(int r=0; r < batch.size; ++r) { + assertEquals("row " + r, r * 42, ((LongColumnVector) batch.cols[0]).vector[r]); + assertEquals("row " + r, r * 10001, lcv.vector[r]); + } + assertEquals(false, rows.nextBatch(batch)); + rows.close(); + } + + /** * Test column projection when using ACID. */ @Test @@ -3933,7 +4006,7 @@ public class TestInputOutputFormat { .fileSystem(fs) .setSchema(fileSchema) .compress(org.apache.orc.CompressionKind.NONE)); - VectorizedRowBatch batch = fileSchema.createRowBatch(1000); + VectorizedRowBatch batch = fileSchema.createRowBatch(TypeDescription.RowBatchVersion.USE_DECIMAL64,1000); batch.size = 1000; StructColumnVector scv = (StructColumnVector)batch.cols[5]; // operation @@ -4047,7 +4120,7 @@ public class TestInputOutputFormat { .stripeSize(128); // Create ORC file with small stripe size so we can write multiple stripes. Writer writer = OrcFile.createWriter(testFilePath, options); - VectorizedRowBatch batch = fileSchema.createRowBatch(1000); + VectorizedRowBatch batch = fileSchema.createRowBatch(TypeDescription.RowBatchVersion.USE_DECIMAL64,1000); batch.size = 1000; StructColumnVector scv = (StructColumnVector)batch.cols[5]; // operation http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java index d8a7af8..cc29384 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java @@ -168,7 +168,7 @@ public class TestOrcRawRecordMerger { setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth"); OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS); setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth"); - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) + Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class))) .thenReturn(recordReader); Mockito.when(recordReader.hasNext()). @@ -192,7 +192,7 @@ public class TestOrcRawRecordMerger { RecordIdentifier minKey = new RecordIdentifier(10, 20, 30); RecordIdentifier maxKey = new RecordIdentifier(40, 50, 60); ReaderPair pair = new OrcRawRecordMerger.ReaderPairAcid(key, reader, minKey, maxKey, - new Reader.Options()); + new Reader.Options(), new HiveConf()); RecordReader recordReader = pair.getRecordReader(); assertEquals(10, key.getWriteId()); assertEquals(20, key.getBucketProperty()); @@ -218,7 +218,7 @@ public class TestOrcRawRecordMerger { Reader reader = createMockReader(); ReaderPair pair = new OrcRawRecordMerger.ReaderPairAcid(key, reader, null, null, - new Reader.Options()); + new Reader.Options(), new HiveConf()); RecordReader recordReader = pair.getRecordReader(); assertEquals(10, key.getWriteId()); assertEquals(20, key.getBucketProperty()); @@ -274,7 +274,7 @@ public class TestOrcRawRecordMerger { OrcStruct row4 = createOriginalRow("fourth"); OrcStruct row5 = createOriginalRow("fifth"); - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) + Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class))) .thenReturn(recordReader); Mockito.when(recordReader.hasNext()). thenReturn(true, true, true, true, true, false); @@ -410,7 +410,7 @@ public class TestOrcRawRecordMerger { types.add(typeBuilder.build()); Mockito.when(reader.getTypes()).thenReturn(types); - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) + Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class))) .thenReturn(recordReader); OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS); http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index 0c9c95d..c23f00e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -49,6 +49,7 @@ import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.orc.TypeDescription; import org.junit.Before; import org.junit.Test; @@ -151,7 +152,7 @@ public class TestVectorizedORCReader { OrcFile.readerOptions(conf)); RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows(); RecordReaderImpl rr = (RecordReaderImpl) reader.rows(); - VectorizedRowBatch batch = reader.getSchema().createRowBatch(); + VectorizedRowBatch batch = reader.getSchema().createRowBatchV2(); OrcStruct row = null; // Check Vectorized ORC reader against ORC row reader http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java index e478371..551e5ca 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java @@ -226,7 +226,7 @@ public class TestVectorizedOrcAcidRowBatchReader { assertTrue(vectorizedReader.getDeleteEventRegistry() instanceof SortMergedDeleteEventRegistry); } TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); - VectorizedRowBatch vectorizedRowBatch = schema.createRowBatch(); + VectorizedRowBatch vectorizedRowBatch = schema.createRowBatchV2(); vectorizedRowBatch.setPartitionInfo(1, 0); // set data column count as 1. long previousPayload = Long.MIN_VALUE; while (vectorizedReader.next(null, vectorizedRowBatch)) { http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/explainanalyze_3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/explainanalyze_3.q b/ql/src/test/queries/clientpositive/explainanalyze_3.q index 3d5b3a8..1f31218 100644 --- a/ql/src/test/queries/clientpositive/explainanalyze_3.q +++ b/ql/src/test/queries/clientpositive/explainanalyze_3.q @@ -110,7 +110,7 @@ select * from cte; explain analyze with cte as (select * from src order by key limit 5) select * from cte; -create table orc_merge5_n1 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; +create table orc_merge5_n1 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5_n1; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/llap_acid2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/llap_acid2.q b/ql/src/test/queries/clientpositive/llap_acid2.q index a409c26..cd06d31 100644 --- a/ql/src/test/queries/clientpositive/llap_acid2.q +++ b/ql/src/test/queries/clientpositive/llap_acid2.q @@ -29,18 +29,27 @@ CREATE TABLE orc_llap_n2 ( cfloat1 FLOAT, cdouble1 DOUBLE, cstring1 string, - cfloat2 float -) stored as orc TBLPROPERTIES ('transactional'='true'); + cfloat2 float, + cdecimal1 decimal(10,3), + cdecimal2 decimal(38,10) +) stored as orc TBLPROPERTIES ('transactional'='true','orc.write.format'='UNSTABLE-PRE-2.0'); insert into table orc_llap_n2 select cint, cbigint, cfloat, cdouble, cint as c1, cbigint as c2, cfloat as c3, cdouble as c4, cint as c8, cbigint as c7, cfloat as c6, cdouble as c5, - cstring1, cfloat as c9 from alltypesorc order by cdouble asc limit 30; - + cstring1, cfloat as c9, cast("1.123" as decimal(10,3))as c10, + cast("1.123456789" as decimal(38,18)) as c11 from alltypesorc order by cdouble asc limit 30; +alter table orc_llap_n2 set TBLPROPERTIES ('transactional'='true','orc.write.format'='0.12'); +insert into table orc_llap_n2 +select cint, cbigint, cfloat, cdouble, + cint as c1, cbigint as c2, cfloat as c3, cdouble as c4, + cint as c8, cbigint as c7, cfloat as c6, cdouble as c5, + cstring1, cfloat as c9, cast("3.321" as decimal(10,3))as c10, + cast("9.987654321" as decimal(38,18)) as c11 from alltypesorc order by cdouble asc limit 30; CREATE TABLE orc_llap2 ( @@ -57,18 +66,22 @@ CREATE TABLE orc_llap2 ( cfloat1 FLOAT, cdouble1 DOUBLE, cstring1 string, - cfloat2 float -) stored as orc TBLPROPERTIES ('transactional'='false'); + cfloat2 float, + cdecimal1 decimal(10,3), + cdecimal2 decimal(38,10) +) stored as orc TBLPROPERTIES ('transactional'='false', 'orc.write.format'='UNSTABLE-PRE-2.0'); insert into table orc_llap2 select cint, cbigint, cfloat, cdouble, cint as c1, cbigint as c2, cfloat as c3, cdouble as c4, cint as c8, cbigint as c7, cfloat as c6, cdouble as c5, - cstring1, cfloat as c9 from alltypesorc order by cdouble asc limit 30; + cstring1, cfloat as c9, cast("1.123" as decimal(10,3))as c10, + cast("1.123456789" as decimal(38,18)) as c11 from alltypesorc order by cdouble asc limit 30; -alter table orc_llap2 set TBLPROPERTIES ('transactional'='true'); +alter table orc_llap2 set TBLPROPERTIES ('transactional'='true','orc.write.format'='0.12'); -update orc_llap2 set cstring1 = 'testvalue' where cstring1 = 'N016jPED08o'; +update orc_llap2 set cstring1 = 'testvalue', cdecimal1 = cast("3.321" as decimal(10,3)), +cdecimal2 = cast("9.987654321" as decimal(38,18)) where cstring1 = 'N016jPED08o'; SET hive.llap.io.enabled=true; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/llap_decimal64_reader.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/llap_decimal64_reader.q b/ql/src/test/queries/clientpositive/llap_decimal64_reader.q new file mode 100644 index 0000000..a81feba --- /dev/null +++ b/ql/src/test/queries/clientpositive/llap_decimal64_reader.q @@ -0,0 +1,54 @@ +--! qt:dataset:alltypesorc +SET hive.vectorized.execution.enabled=true; + +SET hive.llap.io.enabled=false; + +SET hive.exec.orc.default.row.index.stride=1000; +SET hive.optimize.index.filter=true; +set hive.auto.convert.join=false; + +DROP TABLE orc_llap_n0; + +-- this test mix and matches orc versions and flips config to use decimal64 column vectors +set hive.auto.convert.join=true; +SET hive.llap.io.enabled=true; +CREATE TABLE orc_llap_n0( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN, + cdecimal1 decimal(10,2), + cdecimal2 decimal(38,5)) + STORED AS ORC tblproperties ("orc.compress"="NONE"); + +insert into table orc_llap_n0 +select ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2, + cast("3.345" as decimal(10,2)), cast("5.56789" as decimal(38,5)) from alltypesorc; + +alter table orc_llap_n0 set tblproperties ("orc.compress"="NONE", 'orc.write.format'='UNSTABLE-PRE-2.0'); + +insert into table orc_llap_n0 +select ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2, + cast("4.456" as decimal(10,2)), cast("5.56789" as decimal(38,5)) from alltypesorc; + +set hive.vectorized.input.format.supports.enabled=decimal_64; +explain vectorization select cdecimal1,cdecimal2 from orc_llap_n0 where cdecimal1 = cast("3.345" as decimal(10,2)) or cdecimal1 = cast("4.456" as decimal(10,2)) + group by cdecimal1,cdecimal2 limit 2; +select cdecimal1,cdecimal2 from orc_llap_n0 where cdecimal1 = cast("3.345" as decimal(10,2)) or cdecimal1 = cast("4.456" as decimal(10,2)) + group by cdecimal1,cdecimal2 limit 2; + +set hive.vectorized.input.format.supports.enabled=none; +explain vectorization select cdecimal1,cdecimal2 from orc_llap_n0 where cdecimal1 = cast("3.345" as decimal(10,2)) or cdecimal1 = cast("4.456" as decimal(10,2)) + group by cdecimal1,cdecimal2 limit 2; +select cdecimal1,cdecimal2 from orc_llap_n0 where cdecimal1 = cast("3.345" as decimal(10,2)) or cdecimal1 = cast("4.456" as decimal(10,2)) + group by cdecimal1,cdecimal2 limit 2; + +DROP TABLE orc_llap_n0; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/llap_uncompressed.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/llap_uncompressed.q b/ql/src/test/queries/clientpositive/llap_uncompressed.q index 875356c..de3cdc6 100644 --- a/ql/src/test/queries/clientpositive/llap_uncompressed.q +++ b/ql/src/test/queries/clientpositive/llap_uncompressed.q @@ -24,13 +24,20 @@ CREATE TABLE orc_llap_n0( ctimestamp1 TIMESTAMP, ctimestamp2 TIMESTAMP, cboolean1 BOOLEAN, - cboolean2 BOOLEAN) + cboolean2 BOOLEAN, + cdecimal1 decimal(10,2), + cdecimal2 decimal(38,5)) STORED AS ORC tblproperties ("orc.compress"="NONE"); insert into table orc_llap_n0 -select ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 -from alltypesorc; +select ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2, + cast("3.345" as decimal(10,2)), cast("5.56789" as decimal(38,5)) from alltypesorc; +alter table orc_llap_n0 set tblproperties ("orc.compress"="NONE", 'orc.write.format'='UNSTABLE-PRE-2.0'); + +insert into table orc_llap_n0 +select ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2, + cast("3.345" as decimal(10,2)), cast("5.56789" as decimal(38,5)) from alltypesorc; SET hive.llap.io.enabled=true; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_create.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_create.q b/ql/src/test/queries/clientpositive/orc_create.q index 6d41009..dfae138 100644 --- a/ql/src/test/queries/clientpositive/orc_create.q +++ b/ql/src/test/queries/clientpositive/orc_create.q @@ -78,7 +78,7 @@ CREATE TABLE orc_create_people_staging ( first_name string, last_name string, address string, - salary decimal, + salary decimal(38,0), start_date timestamp, state string); @@ -90,7 +90,7 @@ CREATE TABLE orc_create_people ( first_name string, last_name string, address string, - salary decimal, + salary decimal(38,0), start_date timestamp) PARTITIONED BY (state string) STORED AS orc; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_llap_counters.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_llap_counters.q b/ql/src/test/queries/clientpositive/orc_llap_counters.q index 9f8e3bb..f3ee76c 100644 --- a/ql/src/test/queries/clientpositive/orc_llap_counters.q +++ b/ql/src/test/queries/clientpositive/orc_llap_counters.q @@ -40,7 +40,8 @@ CREATE TABLE orc_ppd_staging_n0(t tinyint, bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd_staging_n0 select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), `dec`, bin from staging_n6 order by t, s; +insert overwrite table orc_ppd_staging_n0 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c, +cast(s as varchar(50)) as v, cast(ts as date) as da, `dec`, bin from staging_n6 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin; -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values -- which makes it hard to test bloom filters @@ -62,7 +63,8 @@ CREATE TABLE orc_ppd_n1(t tinyint, bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd_n1 select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, `dec`, bin from orc_ppd_staging_n0 order by t, s; +insert overwrite table orc_ppd_n1 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c, +cast(s as varchar(50)) as v, da, `dec`, bin from orc_ppd_staging_n0 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin; describe formatted orc_ppd_n1; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_llap_counters1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_llap_counters1.q b/ql/src/test/queries/clientpositive/orc_llap_counters1.q index 16df96c..f12870c 100644 --- a/ql/src/test/queries/clientpositive/orc_llap_counters1.q +++ b/ql/src/test/queries/clientpositive/orc_llap_counters1.q @@ -39,7 +39,8 @@ CREATE TABLE orc_ppd_staging(t tinyint, bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), `dec`, bin from staging order by t, s; +insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c, +cast(s as varchar(50)) as v, cast(ts as date) as da, `dec`, bin from staging order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin; -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values -- which makes it hard to test bloom filters @@ -61,7 +62,9 @@ CREATE TABLE orc_ppd(t tinyint, bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, `dec`, bin from orc_ppd_staging order by t, s; +insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c, +cast(s as varchar(50)) as v, da, `dec`, bin from orc_ppd_staging order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin; + describe formatted orc_ppd; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_merge11.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_merge11.q b/ql/src/test/queries/clientpositive/orc_merge11.q index d5add84..208c5b7 100644 --- a/ql/src/test/queries/clientpositive/orc_merge11.q +++ b/ql/src/test/queries/clientpositive/orc_merge11.q @@ -3,15 +3,15 @@ set hive.vectorized.execution.enabled=false; DROP TABLE orcfile_merge1_n2; DROP TABLE orc_split_elim_n0; -create table orc_split_elim_n0 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; +create table orc_split_elim_n0 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_n0; load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_n0; -create table orcfile_merge1_n2 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc tblproperties("orc.compress.size"="4096"); +create table orcfile_merge1_n2 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc tblproperties("orc.compress.size"="4096"); -insert overwrite table orcfile_merge1_n2 select * from orc_split_elim_n0; -insert into table orcfile_merge1_n2 select * from orc_split_elim_n0; +insert overwrite table orcfile_merge1_n2 select * from orc_split_elim_n0 order by userid; +insert into table orcfile_merge1_n2 select * from orc_split_elim_n0 order by userid; dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1_n2/; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_merge5.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_merge5.q b/ql/src/test/queries/clientpositive/orc_merge5.q index 190c6e0..4ae5ba6 100644 --- a/ql/src/test/queries/clientpositive/orc_merge5.q +++ b/ql/src/test/queries/clientpositive/orc_merge5.q @@ -3,8 +3,8 @@ set hive.explain.user=false; -- SORT_QUERY_RESULTS -create table orc_merge5_n5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; -create table orc_merge5b_n0 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; +create table orc_merge5_n5 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; +create table orc_merge5b_n0 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5_n5; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_merge6.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_merge6.q b/ql/src/test/queries/clientpositive/orc_merge6.q index fabe656..1c7ab08 100644 --- a/ql/src/test/queries/clientpositive/orc_merge6.q +++ b/ql/src/test/queries/clientpositive/orc_merge6.q @@ -4,8 +4,8 @@ set hive.explain.user=false; -- SORT_QUERY_RESULTS -- orc file merge tests for static partitions -create table orc_merge5_n4 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; -create table orc_merge5a_n1 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (year string, hour int) stored as orc; +create table orc_merge5_n4 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; +create table orc_merge5a_n1 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) partitioned by (year string, hour int) stored as orc; load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5_n4; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_merge7.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_merge7.q b/ql/src/test/queries/clientpositive/orc_merge7.q index 2558797..6504989 100644 --- a/ql/src/test/queries/clientpositive/orc_merge7.q +++ b/ql/src/test/queries/clientpositive/orc_merge7.q @@ -5,8 +5,8 @@ set hive.explain.user=false; -- orc merge file tests for dynamic partition case -create table orc_merge5_n2 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; -create table orc_merge5a_n0 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (st double) stored as orc; +create table orc_merge5_n2 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; +create table orc_merge5a_n0 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) partitioned by (st double) stored as orc; load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5_n2; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_merge_incompat1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_merge_incompat1.q b/ql/src/test/queries/clientpositive/orc_merge_incompat1.q index aba4617..2b768ea 100644 --- a/ql/src/test/queries/clientpositive/orc_merge_incompat1.q +++ b/ql/src/test/queries/clientpositive/orc_merge_incompat1.q @@ -3,8 +3,8 @@ set hive.explain.user=false; -- SORT_QUERY_RESULTS -create table orc_merge5_n3 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; -create table orc_merge5b (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; +create table orc_merge5_n3 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; +create table orc_merge5b (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5_n3; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_merge_incompat2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_merge_incompat2.q b/ql/src/test/queries/clientpositive/orc_merge_incompat2.q index ef66522..6281c96 100644 --- a/ql/src/test/queries/clientpositive/orc_merge_incompat2.q +++ b/ql/src/test/queries/clientpositive/orc_merge_incompat2.q @@ -6,8 +6,8 @@ set hive.explain.user=false; -- orc merge file tests for dynamic partition case -create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; -create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (st double) stored as orc; +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; +create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) partitioned by (st double) stored as orc; load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_ppd_basic.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_ppd_basic.q b/ql/src/test/queries/clientpositive/orc_ppd_basic.q index f0b0b96..b3f166a 100644 --- a/ql/src/test/queries/clientpositive/orc_ppd_basic.q +++ b/ql/src/test/queries/clientpositive/orc_ppd_basic.q @@ -40,7 +40,8 @@ CREATE TABLE orc_ppd_staging_n1(t tinyint, bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd_staging_n1 select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), `dec`, bin from staging_n7 order by t, s; +insert overwrite table orc_ppd_staging_n1 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c, +cast(s as varchar(50)) as v, cast(ts as date) as da, `dec`, bin from staging_n7 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin; -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values -- which makes it hard to test bloom filters @@ -62,7 +63,9 @@ CREATE TABLE orc_ppd_n2(t tinyint, bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd_n2 select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, `dec`, bin from orc_ppd_staging_n1 order by t, s; +insert overwrite table orc_ppd_n2 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c, +cast(s as varchar(50)) as v, da, `dec`, bin from orc_ppd_staging_n1 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin; + SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q b/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q index 4235c2c..9d79b11 100644 --- a/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q +++ b/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q @@ -37,7 +37,8 @@ CREATE TABLE orc_ppd_staging_n2(t tinyint, bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd_staging_n2 select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), `dec`, bin from staging_n8 order by t, s; +insert overwrite table orc_ppd_staging_n2 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c, +cast(s as varchar(50)) as v, cast(ts as date) as da, `dec`, bin from staging_n8 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin; -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values -- which makes it hard to test bloom filters @@ -59,7 +60,8 @@ CREATE TABLE orc_ppd_n3(t tinyint, bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd_n3 select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, `dec`, bin from orc_ppd_staging_n2 order by t, s; +insert overwrite table orc_ppd_n3 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c, +cast(s as varchar(50)) as v, da, `dec`, bin from orc_ppd_staging_n2 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin; SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; SET hive.optimize.index.filter=false; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_schema_evolution_float.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_schema_evolution_float.q b/ql/src/test/queries/clientpositive/orc_schema_evolution_float.q index c2d9840..ca5dc6f 100644 --- a/ql/src/test/queries/clientpositive/orc_schema_evolution_float.q +++ b/ql/src/test/queries/clientpositive/orc_schema_evolution_float.q @@ -1,6 +1,8 @@ set hive.vectorized.execution.enabled=false; set hive.optimize.index.filter=false; set hive.metastore.disallow.incompatible.col.type.changes=false; +-- set this to 'decimal_64' after resolving HIVE-19792 +set hive.vectorized.input.format.supports.enabled=none; drop table float_text; create table float_text(f float); http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/orc_split_elimination.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_split_elimination.q b/ql/src/test/queries/clientpositive/orc_split_elimination.q index 719b21c..03e0e73 100644 --- a/ql/src/test/queries/clientpositive/orc_split_elimination.q +++ b/ql/src/test/queries/clientpositive/orc_split_elimination.q @@ -2,7 +2,7 @@ set hive.vectorized.execution.enabled=false; -- SORT_QUERY_RESULTS -create table orc_split_elim (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; +create table orc_split_elim (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc; load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim; @@ -105,7 +105,7 @@ select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70; SET hive.optimize.index.filter=false; -- partitioned table -create table orc_split_elim_part (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (country string, year int) stored as orc; +create table orc_split_elim_part (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) partitioned by (country string, year int) stored as orc; alter table orc_split_elim_part add partition(country='us', year=2000); alter table orc_split_elim_part add partition(country='us', year=2001); http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive.q b/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive.q index 427734f..53c16e0 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive.q @@ -12,6 +12,8 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.default.fileformat=orc; set hive.llap.io.enabled=false; +-- set this to 'decimal_64' after resolving HIVE-19792 +set hive.vectorized.input.format.supports.enabled=none; -- SORT_QUERY_RESULTS -- http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive_llap_io.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive_llap_io.q index 1eca9e3..f2fb2f0 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive_llap_io.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_part_all_primitive_llap_io.q @@ -13,6 +13,8 @@ set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.default.fileformat=orc; set hive.llap.io.enabled=true; set hive.llap.io.encode.enabled=true; +-- set this to 'decimal_64' after resolving HIVE-19792 +set hive.vectorized.input.format.supports.enabled=none; -- SORT_QUERY_RESULTS -- http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q b/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q index 6e35f5a..e811f1d 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive.q @@ -12,6 +12,8 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.default.fileformat=orc; set hive.llap.io.enabled=false; +-- set this to 'decimal_64' after resolving HIVE-19792 +set hive.vectorized.input.format.supports.enabled=none; -- SORT_QUERY_RESULTS -- http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive_llap_io.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive_llap_io.q index 576f994..bae6cc8 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive_llap_io.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_vec_part_all_primitive_llap_io.q @@ -13,6 +13,8 @@ set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.default.fileformat=orc; set hive.llap.io.enabled=true; set hive.llap.io.encode.enabled=true; +-- set this to 'decimal_64' after resolving HIVE-19792 +set hive.vectorized.input.format.supports.enabled=none; -- SORT_QUERY_RESULTS -- http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/type_change_test_int.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/type_change_test_int.q b/ql/src/test/queries/clientpositive/type_change_test_int.q index 112a674..2a49871 100644 --- a/ql/src/test/queries/clientpositive/type_change_test_int.q +++ b/ql/src/test/queries/clientpositive/type_change_test_int.q @@ -1,3 +1,6 @@ +-- set this to 'decimal_64' after resolving HIVE-19792 +set hive.vectorized.input.format.supports.enabled=none; + -- Create a base table to be used for loading data: Begin drop table if exists testAltCol_n1; create table testAltCol_n1 http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/type_change_test_int_vectorized.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/type_change_test_int_vectorized.q b/ql/src/test/queries/clientpositive/type_change_test_int_vectorized.q index 9e93a2f..6a940ac 100644 --- a/ql/src/test/queries/clientpositive/type_change_test_int_vectorized.q +++ b/ql/src/test/queries/clientpositive/type_change_test_int_vectorized.q @@ -1,3 +1,5 @@ +-- set this to 'decimal_64' after resolving HIVE-19792 +set hive.vectorized.input.format.supports.enabled=none; -- Create a base table to be used for loading data: Begin drop table if exists testAltCol; create table testAltCol http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/vector_case_when_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_case_when_1.q b/ql/src/test/queries/clientpositive/vector_case_when_1.q index 0ba17da..8614087 100644 --- a/ql/src/test/queries/clientpositive/vector_case_when_1.q +++ b/ql/src/test/queries/clientpositive/vector_case_when_1.q @@ -5,6 +5,8 @@ set hive.explain.user=false; set hive.fetch.task.conversion=none; set hive.vectorized.execution.enabled=true; +-- SORT_QUERY_RESULTS + CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, @@ -69,8 +71,7 @@ SELECT IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 -FROM lineitem_test -ORDER BY Quantity; +FROM lineitem_test; SELECT L_QUANTITY as Quantity, CASE @@ -109,8 +110,7 @@ SELECT IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 -FROM lineitem_test -ORDER BY Quantity; +FROM lineitem_test; SET hive.vectorized.if.expr.mode=good; @@ -153,8 +153,7 @@ SELECT IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 -FROM lineitem_test -ORDER BY Quantity; +FROM lineitem_test; SELECT L_QUANTITY as Quantity, CASE @@ -193,8 +192,7 @@ SELECT IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 -FROM lineitem_test -ORDER BY Quantity; +FROM lineitem_test; SET hive.vectorized.if.expr.mode=better; @@ -237,8 +235,7 @@ SELECT IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 -FROM lineitem_test -ORDER BY Quantity; +FROM lineitem_test; SELECT L_QUANTITY as Quantity, CASE @@ -277,6 +274,5 @@ SELECT IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 -FROM lineitem_test -ORDER BY Quantity; - \ No newline at end of file +FROM lineitem_test; + http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/vector_decimal_5.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_decimal_5.q b/ql/src/test/queries/clientpositive/vector_decimal_5.q index f5de13b..e0956e4 100644 --- a/ql/src/test/queries/clientpositive/vector_decimal_5.q +++ b/ql/src/test/queries/clientpositive/vector_decimal_5.q @@ -21,9 +21,10 @@ SELECT key FROM DECIMAL_5 ORDER BY key; SELECT DISTINCT key FROM DECIMAL_5 ORDER BY key; +explain SELECT cast(key as decimal) FROM DECIMAL_5; SELECT cast(key as decimal) FROM DECIMAL_5; SELECT cast(key as decimal(6,3)) FROM DECIMAL_5; DROP TABLE DECIMAL_5_txt; -DROP TABLE DECIMAL_5; \ No newline at end of file +DROP TABLE DECIMAL_5; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q b/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q index 6e5b5b6..ef769fb 100644 --- a/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q @@ -35,11 +35,13 @@ select t1_n48.`dec`, t2_n29.`dec` from t1_n48 join t2_n29 on (t1_n48.`dec`=t2_n2 -- SORT_QUERY_RESULTS select t1_n48.`dec`, t2_n29.`dec` from t1_n48 join t2_n29 on (t1_n48.`dec`=t2_n29.`dec`); +select count(*) from (select t1_n48.`dec`, t2_n29.`dec` from t1_n48 join t2_n29 on (t1_n48.`dec`=t2_n29.`dec`)) as t; explain vectorization detail select t1_n48.`dec`, t1_n48.value_dec, t2_n29.`dec`, t2_n29.value_dec from t1_n48 join t2_n29 on (t1_n48.`dec`=t2_n29.`dec`); select t1_n48.`dec`, t1_n48.value_dec, t2_n29.`dec`, t2_n29.value_dec from t1_n48 join t2_n29 on (t1_n48.`dec`=t2_n29.`dec`); +select count(*) from (select t1_n48.`dec`, t1_n48.value_dec, t2_n29.`dec`, t2_n29.value_dec from t1_n48 join t2_n29 on (t1_n48.`dec`=t2_n29.`dec`)) as t; @@ -72,11 +74,13 @@ select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.` -- SORT_QUERY_RESULTS select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); +select count(*) from (select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`)) as t; explain vectorization detail select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); +select count(*) from (select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`)) as t; set hive.vectorized.input.format.supports.enabled=none; @@ -87,9 +91,11 @@ select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.` -- SORT_QUERY_RESULTS select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); +select count(*) from (select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`)) as t; explain vectorization detail select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`); +select count(*) from (select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`)) as t; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q index 7998035..743e8db 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q @@ -39,7 +39,7 @@ EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint = b.partkey_bigint); -- single key (decimal) -EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_decimal = b.partkey_decimal); +EXPLAIN VECTORIZATION DETAIL select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_decimal = b.partkey_decimal); select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_decimal = b.partkey_decimal); -- single key (double) http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index eb4a8cb..80bbba4 100644 --- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1144,8 +1144,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true usesVectorUDFAdaptor: false @@ -1342,8 +1342,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true usesVectorUDFAdaptor: false @@ -1511,8 +1511,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -1541,8 +1541,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true usesVectorUDFAdaptor: false @@ -1929,8 +1929,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true usesVectorUDFAdaptor: false @@ -2128,8 +2128,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true usesVectorUDFAdaptor: false @@ -2298,8 +2298,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -2328,8 +2328,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true usesVectorUDFAdaptor: false http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out index 957dfd8..66bb2db 100644 --- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out @@ -665,22 +665,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_orc_bucketed - Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1237 Data size: 707880 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) outputColumnNames: ROW__ID - Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1237 Data size: 707880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 618 Data size: 51912 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) - Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 618 Data size: 51912 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: may be used (ACID table) @@ -692,13 +692,13 @@ STAGE PLANS: keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 618 Data size: 51912 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index 84477c3..7a880dd 100644 --- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3233,19 +3233,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n1 - Statistics: Num rows: 267 Data size: 83640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 281 Data size: 87904 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean) - Statistics: Num rows: 5 Data size: 1566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), i (type: int), vc (type: varchar(128)) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 1566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 1566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: varchar(128)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -3255,10 +3255,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 1566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -3326,7 +3326,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n1 - Statistics: Num rows: 305 Data size: 95448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 320 Data size: 100040 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((de = 3.14) and enforce_constraint((i is not null and vc is not null))) (type: boolean) Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE