HIVE-19317 : Handle schema evolution from int like types to decimal (Janaki Lahorani, reviewed by Vihang Karajgaonkar)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9cd62587 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9cd62587 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9cd62587 Branch: refs/heads/branch-3 Commit: 9cd62587fb1122be08abf6e36c7bf6db0d5f6bb5 Parents: 3c44a38 Author: Janaki Lahorani <jan...@cloudera.com> Authored: Wed May 16 13:14:32 2018 -0700 Committer: Vihang Karajgaonkar <vih...@cloudera.com> Committed: Wed May 16 14:47:01 2018 -0700 ---------------------------------------------------------------------- .../ql/io/parquet/convert/ETypeConverter.java | 486 +++---- .../parquet/vector/ParquetDataColumnReader.java | 4 +- .../vector/ParquetDataColumnReaderFactory.java | 189 ++- .../vector/VectorizedPrimitiveColumnReader.java | 99 +- .../queries/clientpositive/read_uint_parquet.q | 126 ++ .../read_uint_parquet_vectorized.q | 127 ++ .../clientpositive/type_change_test_int.q | 450 +++++++ .../type_change_test_int_vectorized.q | 450 +++++++ .../clientpositive/read_uint_parquet.q.out | 686 ++++++++++ .../read_uint_parquet_vectorized.q.out | 686 ++++++++++ .../clientpositive/type_change_test_int.q.out | 1268 +++++++++++++++++- .../type_change_test_int_vectorized.q.out | 1268 +++++++++++++++++- 12 files changed, 5535 insertions(+), 304 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/9cd62587/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index 21762cd..8be8d13 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -26,8 +26,11 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -95,307 +98,213 @@ public enum ETypeConverter { @Override PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { - if (OriginalType.UINT_8 == type.getOriginalType() || - OriginalType.UINT_16 == type.getOriginalType() || - OriginalType.UINT_32 == type.getOriginalType() || - OriginalType.UINT_64 == type.getOriginalType()) { - if (hiveTypeInfo != null) { - switch (hiveTypeInfo.getTypeName()) { - case serdeConstants.BIGINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - if (value >= 0) { - parent.set(index, new LongWritable((long) value)); - } else { - parent.set(index, null); - } - } - }; - case serdeConstants.FLOAT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - if (value >= 0) { - parent.set(index, new FloatWritable((float) value)); - } else { - parent.set(index, null); - } - } - }; - case serdeConstants.DOUBLE_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - if (value >= 0) { - parent.set(index, new DoubleWritable((float) value)); - } else { - parent.set(index, null); - } - } - }; - case serdeConstants.SMALLINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - if ((value >= 0) && (value <= Short.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } - } - }; - case serdeConstants.TINYINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - if ((value >= 0) && (value <= Byte.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } - } - }; - default: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - if (value >= 0) { - parent.set(index, new IntWritable(value)); - } else { - parent.set(index, null); - } - } - }; - } - } - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - if (value >= 0) { - parent.set(index, new IntWritable(value)); - } else { - parent.set(index, null); - } - } - }; - } else { - if (hiveTypeInfo != null) { - switch (hiveTypeInfo.getTypeName()) { - case serdeConstants.BIGINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { + if (hiveTypeInfo != null) { + String typeName = TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName()); + final long minValue = getMinValue(type, typeName, Integer.MIN_VALUE); + final long maxValue = getMaxValue(typeName, Integer.MAX_VALUE); + + switch (typeName) { + case serdeConstants.BIGINT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + if (value >= minValue) { parent.set(index, new LongWritable((long) value)); + } else { + parent.set(index, null); } - }; - case serdeConstants.FLOAT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { + } + }; + case serdeConstants.FLOAT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + if (value >= minValue) { parent.set(index, new FloatWritable((float) value)); + } else { + parent.set(index, null); } - }; - case serdeConstants.DOUBLE_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { + } + }; + case serdeConstants.DOUBLE_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + if (value >= minValue) { parent.set(index, new DoubleWritable((float) value)); + } else { + parent.set(index, null); } - }; - case serdeConstants.SMALLINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - if ((value >= Short.MIN_VALUE) && (value <= Short.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } + } + }; + case serdeConstants.DECIMAL_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + if (value >= minValue) { + parent.set(index, HiveDecimalUtils + .enforcePrecisionScale(new HiveDecimalWritable(value), + (DecimalTypeInfo) hiveTypeInfo)); + } else { + parent.set(index, null); } - }; - case serdeConstants.TINYINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - if ((value >= Byte.MIN_VALUE) && (value <= Byte.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } + } + }; + case serdeConstants.SMALLINT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + if ((value >= minValue) && (value <= maxValue)) { + parent.set(index, new IntWritable((int) value)); + } else { + parent.set(index, null); } - }; - default: - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { + } + }; + case serdeConstants.TINYINT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + if ((value >= minValue) && (value <= maxValue)) { + parent.set(index, new IntWritable((int) value)); + } else { + parent.set(index, null); + } + } + }; + default: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + if ((value >= minValue) && (value <= maxValue)) { parent.set(index, new IntWritable(value)); + } else { + parent.set(index, null); } - }; - } + } + }; } - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { + } + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + if (value >= ((OriginalType.UINT_8 == type.getOriginalType() || + OriginalType.UINT_16 == type.getOriginalType() || + OriginalType.UINT_32 == type.getOriginalType() || + OriginalType.UINT_64 == type.getOriginalType()) ? 0 : + Integer.MIN_VALUE)) { parent.set(index, new IntWritable(value)); + } else { + parent.set(index, null); } - }; - } + } + }; } }, EINT64_CONVERTER(Long.TYPE) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { - if (OriginalType.UINT_8 == type.getOriginalType() || - OriginalType.UINT_16 == type.getOriginalType() || - OriginalType.UINT_32 == type.getOriginalType() || - OriginalType.UINT_64 == type.getOriginalType()) { - if (hiveTypeInfo != null) { - switch (hiveTypeInfo.getTypeName()) { - case serdeConstants.FLOAT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(final long value) { - if (value >= 0) { - parent.set(index, new FloatWritable(value)); - } else { - parent.set(index, null); - } - } - }; - case serdeConstants.DOUBLE_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(final long value) { - if (value >= 0) { - parent.set(index, new DoubleWritable(value)); - } else { - parent.set(index, null); - } - } - }; - case serdeConstants.INT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(long value) { - if ((value >= 0) && (value <= Integer.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } - } - }; - case serdeConstants.SMALLINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(long value) { - if ((value >= 0) && (value <= Short.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } - } - }; - case serdeConstants.TINYINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(long value) { - if ((value >= 0) && (value <= Byte.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } - } - }; - default: - return new PrimitiveConverter() { - @Override - public void addLong(final long value) { - if (value >= 0) { - parent.set(index, new LongWritable(value)); - } else { - parent.set(index, null); - } - } - }; - } - } - return new PrimitiveConverter() { - @Override - public void addLong(final long value) { - if (value >= 0) { - parent.set(index, new LongWritable(value)); - } else { - parent.set(index, null); - } - } - }; - } else { - if (hiveTypeInfo != null) { - switch (hiveTypeInfo.getTypeName()) { - case serdeConstants.FLOAT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(final long value) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, + final ConverterParent parent, TypeInfo hiveTypeInfo) { + if (hiveTypeInfo != null) { + String typeName = TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName()); + final long minValue = getMinValue(type, typeName, Long.MIN_VALUE); + final long maxValue = getMaxValue(typeName, Long.MAX_VALUE); + + switch (typeName) { + case serdeConstants.FLOAT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addLong(final long value) { + if (value >= minValue) { parent.set(index, new FloatWritable(value)); + } else { + parent.set(index, null); } - }; - case serdeConstants.DOUBLE_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(final long value) { + } + }; + case serdeConstants.DOUBLE_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addLong(final long value) { + if (value >= minValue) { parent.set(index, new DoubleWritable(value)); + } else { + parent.set(index, null); } - }; - case serdeConstants.INT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(long value) { - if ((value >= Integer.MIN_VALUE) && (value <= Integer.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } + } + }; + case serdeConstants.DECIMAL_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addLong(long value) { + if (value >= minValue) { + parent.set(index, HiveDecimalUtils + .enforcePrecisionScale(new HiveDecimalWritable(value), + (DecimalTypeInfo) hiveTypeInfo)); + } else { + parent.set(index, null); } - }; - case serdeConstants.SMALLINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(long value) { - if ((value >= Short.MIN_VALUE) && (value <= Short.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } + } + }; + case serdeConstants.INT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addLong(long value) { + if ((value >= minValue) && (value <= maxValue)) { + parent.set(index, new IntWritable((int) value)); + } else { + parent.set(index, null); + } + } + }; + case serdeConstants.SMALLINT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addLong(long value) { + if ((value >= minValue) && (value <= maxValue)) { + parent.set(index, new IntWritable((int) value)); + } else { + parent.set(index, null); } - }; - case serdeConstants.TINYINT_TYPE_NAME: - return new PrimitiveConverter() { - @Override - public void addLong(long value) { - if ((value >= Byte.MIN_VALUE) && (value <= Byte.MAX_VALUE)) { - parent.set(index, new IntWritable((int) value)); - } else { - parent.set(index, null); - } + } + }; + case serdeConstants.TINYINT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addLong(long value) { + if ((value >= minValue) && (value <= maxValue)) { + parent.set(index, new IntWritable((int) value)); + } else { + parent.set(index, null); } - }; - default: - return new PrimitiveConverter() { - @Override - public void addLong(final long value) { + } + }; + default: + return new PrimitiveConverter() { + @Override + public void addLong(final long value) { + if (value >= minValue) { parent.set(index, new LongWritable(value)); + } else { + parent.set(index, null); } - }; - } + } + }; } - return new PrimitiveConverter() { - @Override - public void addLong(final long value) { + } + return new PrimitiveConverter() { + @Override + public void addLong(final long value) { + if (value >= ((OriginalType.UINT_8 == type.getOriginalType() || + OriginalType.UINT_16 == type.getOriginalType() || + OriginalType.UINT_32 == type.getOriginalType() || + OriginalType.UINT_64 == type.getOriginalType()) ? 0 : Long.MIN_VALUE)) { parent.set(index, new LongWritable(value)); + } else { + parent.set(index, null); } - }; - } + } + }; } }, EBINARY_CONVERTER(Binary.class) { @@ -497,6 +406,39 @@ public enum ETypeConverter { throw new IllegalArgumentException("Converter not found ... for type : " + type); } + private static long getMinValue(final PrimitiveType type, String typeName, long defaultValue) { + if (OriginalType.UINT_8 == type.getOriginalType() || + OriginalType.UINT_16 == type.getOriginalType() || + OriginalType.UINT_32 == type.getOriginalType() || + OriginalType.UINT_64 == type.getOriginalType()) { + return 0; + } else { + switch (typeName) { + case serdeConstants.INT_TYPE_NAME: + return Integer.MIN_VALUE; + case serdeConstants.SMALLINT_TYPE_NAME: + return Short.MIN_VALUE; + case serdeConstants.TINYINT_TYPE_NAME: + return Byte.MIN_VALUE; + default: + return defaultValue; + } + } + } + + private static long getMaxValue(String typeName, long defaultValue) { + switch (typeName) { + case serdeConstants.INT_TYPE_NAME: + return Integer.MAX_VALUE; + case serdeConstants.SMALLINT_TYPE_NAME: + return Short.MAX_VALUE; + case serdeConstants.TINYINT_TYPE_NAME: + return Byte.MAX_VALUE; + default: + return defaultValue; + } + } + public abstract static class BinaryConverter<T extends Writable> extends PrimitiveConverter { protected final PrimitiveType type; private final ConverterParent parent; http://git-wip-us.apache.org/repos/asf/hive/blob/9cd62587/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java index 5e68022..954e29b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java @@ -110,9 +110,11 @@ public interface ParquetDataColumnReader { * The type of the data in Parquet files need not match the type in HMS. In that case * the value returned to the user will depend on the data. If the data value is within the valid * range accommodated by the HMS type, the data will be returned as is. When data is not within - * the valid range, a NULL will be returned. This function will do the appropriate check. + * the valid range, a NULL will be returned. These functions will do the appropriate check. */ boolean isValid(long value); + boolean isValid(float value); + boolean isValid(double value); /** * @return the underlying dictionary if current reader is dictionary encoded http://git-wip-us.apache.org/repos/asf/hive/blob/9cd62587/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java index 17d6e33..0406308 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java @@ -25,9 +25,11 @@ import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.parquet.column.Dictionary; import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.io.api.Binary; @@ -188,6 +190,16 @@ public final class ParquetDataColumnReaderFactory { } @Override + public boolean isValid(float value) { + return true; + } + + @Override + public boolean isValid(double value) { + return true; + } + + @Override public long readLong(int id) { return dict.decodeToLong(id); } @@ -476,6 +488,35 @@ public final class ParquetDataColumnReaderFactory { } /** + * The reader who reads long data using Decimal type. + */ + public static class Types64Int2DecimalPageReader extends TypesFromInt64PageReader { + private int precision = 0; + private int scale = 0; + private final HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(0L); + + public Types64Int2DecimalPageReader(ValuesReader realReader, int length, int precision, + int scale) { + super(realReader, length); + this.precision = precision; + this.scale = scale; + } + + public Types64Int2DecimalPageReader(Dictionary dict, int length, int precision, int scale) { + super(dict, length); + this.precision = precision; + this.scale = scale; + } + + @Override + public boolean isValid(long value) { + hiveDecimalWritable.setFromLong(value); + hiveDecimalWritable.mutateEnforcePrecisionScale(precision, scale); + return hiveDecimalWritable.isSet(); + } + } + + /** * The reader who reads unsigned long data. */ public static class TypesFromUInt64PageReader extends TypesFromInt64PageReader { @@ -492,6 +533,16 @@ public final class ParquetDataColumnReaderFactory { public boolean isValid(long value) { return (value >= 0); } + + @Override + public boolean isValid(float value) { + return (value >= 0); + } + + @Override + public boolean isValid(double value) { + return (value >= 0); + } } /** @@ -550,6 +601,35 @@ public final class ParquetDataColumnReaderFactory { } /** + * The reader who reads unsigned long data using Decimal type. + */ + public static class Types64UInt2DecimalPageReader extends TypesFromInt64PageReader { + private int precision = 0; + private int scale = 0; + private final HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(0L); + + public Types64UInt2DecimalPageReader(ValuesReader realReader, int length, int precision, + int scale) { + super(realReader, length); + this.precision = precision; + this.scale = scale; + } + + public Types64UInt2DecimalPageReader(Dictionary dict, int length, int precision, int scale) { + super(dict, length); + this.precision = precision; + this.scale = scale; + } + + @Override + public boolean isValid(long value) { + hiveDecimalWritable.setFromLong(value); + hiveDecimalWritable.mutateEnforcePrecisionScale(precision, scale); + return ((value >= 0) && hiveDecimalWritable.isSet()); + } + } + + /** * The reader who reads int data using smallint type. */ public static class Types32Int2SmallintPageReader extends TypesFromInt32PageReader { @@ -586,6 +666,35 @@ public final class ParquetDataColumnReaderFactory { } /** + * The reader who reads int data using Decimal type. + */ + public static class Types32Int2DecimalPageReader extends TypesFromInt32PageReader { + private int precision = 0; + private int scale = 0; + private final HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(0L); + + public Types32Int2DecimalPageReader(ValuesReader realReader, int length, int precision, + int scale) { + super(realReader, length); + this.precision = precision; + this.scale = scale; + } + + public Types32Int2DecimalPageReader(Dictionary dict, int length, int precision, int scale) { + super(dict, length); + this.precision = precision; + this.scale = scale; + } + + @Override + public boolean isValid(long value) { + hiveDecimalWritable.setFromLong(value); + hiveDecimalWritable.mutateEnforcePrecisionScale(precision, scale); + return hiveDecimalWritable.isSet(); + } + } + + /** * The reader who reads unsigned int data. */ public static class TypesFromUInt32PageReader extends TypesFromInt32PageReader { @@ -601,6 +710,16 @@ public final class ParquetDataColumnReaderFactory { public boolean isValid(long value) { return (value >= 0); } + + @Override + public boolean isValid(float value) { + return (value >= 0); + } + + @Override + public boolean isValid(double value) { + return (value >= 0); + } } /** @@ -640,6 +759,35 @@ public final class ParquetDataColumnReaderFactory { } /** + * The reader who reads unsigned int data using Decimal type. + */ + public static class Types32UInt2DecimalPageReader extends TypesFromInt32PageReader { + private int precision = 0; + private int scale = 0; + private final HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(0L); + + public Types32UInt2DecimalPageReader(ValuesReader realReader, int length, int precision, + int scale) { + super(realReader, length); + this.precision = precision; + this.scale = scale; + } + + public Types32UInt2DecimalPageReader(Dictionary dict, int length, int precision, int scale) { + super(dict, length); + this.precision = precision; + this.scale = scale; + } + + @Override + public boolean isValid(long value) { + hiveDecimalWritable.setFromLong(value); + hiveDecimalWritable.mutateEnforcePrecisionScale(precision, scale); + return ((value >= 0) && hiveDecimalWritable.isSet()); + } + } + + /** * The reader who reads from the underlying float value value. Implementation is in consist with * ETypeConverter EFLOAT_CONVERTER */ @@ -1044,6 +1192,7 @@ public final class ParquetDataColumnReaderFactory { throws IOException { // max length for varchar and char cases int length = getVarcharLength(hiveType); + String typeName = TypeInfoUtils.getBaseName(hiveType.getTypeName()); switch (parquetType.getPrimitiveTypeName()) { case INT32: @@ -1051,25 +1200,41 @@ public final class ParquetDataColumnReaderFactory { OriginalType.UINT_16 == parquetType.getOriginalType() || OriginalType.UINT_32 == parquetType.getOriginalType() || OriginalType.UINT_64 == parquetType.getOriginalType()) { - switch (hiveType.getTypeName()) { + switch (typeName) { case serdeConstants.SMALLINT_TYPE_NAME: return isDictionary ? new Types32UInt2SmallintPageReader(dictionary, length) : new Types32UInt2SmallintPageReader(valuesReader, length); case serdeConstants.TINYINT_TYPE_NAME: return isDictionary ? new Types32UInt2TinyintPageReader(dictionary, length) : new Types32UInt2TinyintPageReader(valuesReader, length); + case serdeConstants.DECIMAL_TYPE_NAME: + return isDictionary ? + new Types32UInt2DecimalPageReader(dictionary, length, + ((DecimalTypeInfo) hiveType).getPrecision(), + ((DecimalTypeInfo) hiveType).getScale()) : + new Types32UInt2DecimalPageReader(valuesReader, length, + ((DecimalTypeInfo) hiveType).getPrecision(), + ((DecimalTypeInfo) hiveType).getScale()); default: return isDictionary ? new TypesFromUInt32PageReader(dictionary, length) : new TypesFromUInt32PageReader(valuesReader, length); } } else { - switch (hiveType.getTypeName()) { + switch (typeName) { case serdeConstants.SMALLINT_TYPE_NAME: return isDictionary ? new Types32Int2SmallintPageReader(dictionary, length) : new Types32Int2SmallintPageReader(valuesReader, length); case serdeConstants.TINYINT_TYPE_NAME: return isDictionary ? new Types32Int2TinyintPageReader(dictionary, length) : new Types32Int2TinyintPageReader(valuesReader, length); + case serdeConstants.DECIMAL_TYPE_NAME: + return isDictionary ? + new Types32Int2DecimalPageReader(dictionary, length, + ((DecimalTypeInfo) hiveType).getPrecision(), + ((DecimalTypeInfo) hiveType).getScale()) : + new Types32Int2DecimalPageReader(valuesReader, length, + ((DecimalTypeInfo) hiveType).getPrecision(), + ((DecimalTypeInfo) hiveType).getScale()); default: return isDictionary ? new TypesFromInt32PageReader(dictionary, length) : new TypesFromInt32PageReader(valuesReader, length); @@ -1080,7 +1245,7 @@ public final class ParquetDataColumnReaderFactory { OriginalType.UINT_16 == parquetType.getOriginalType() || OriginalType.UINT_32 == parquetType.getOriginalType() || OriginalType.UINT_64 == parquetType.getOriginalType()) { - switch (hiveType.getTypeName()) { + switch (typeName) { case serdeConstants.INT_TYPE_NAME: return isDictionary ? new Types64UInt2IntPageReader(dictionary, length) : new Types64UInt2IntPageReader(valuesReader, length); @@ -1090,12 +1255,20 @@ public final class ParquetDataColumnReaderFactory { case serdeConstants.TINYINT_TYPE_NAME: return isDictionary ? new Types64UInt2TinyintPageReader(dictionary, length) : new Types64UInt2TinyintPageReader(valuesReader, length); + case serdeConstants.DECIMAL_TYPE_NAME: + return isDictionary ? + new Types64UInt2DecimalPageReader(dictionary, length, + ((DecimalTypeInfo) hiveType).getPrecision(), + ((DecimalTypeInfo) hiveType).getScale()) : + new Types64UInt2DecimalPageReader(valuesReader, length, + ((DecimalTypeInfo) hiveType).getPrecision(), + ((DecimalTypeInfo) hiveType).getScale()); default: return isDictionary ? new TypesFromUInt64PageReader(dictionary, length) : new TypesFromUInt64PageReader(valuesReader, length); } } else { - switch (hiveType.getTypeName()) { + switch (typeName) { case serdeConstants.INT_TYPE_NAME: return isDictionary ? new Types64Int2IntPageReader(dictionary, length) : new Types64Int2IntPageReader(valuesReader, length); @@ -1105,6 +1278,14 @@ public final class ParquetDataColumnReaderFactory { case serdeConstants.TINYINT_TYPE_NAME: return isDictionary ? new Types64Int2TinyintPageReader(dictionary, length) : new Types64Int2TinyintPageReader(valuesReader, length); + case serdeConstants.DECIMAL_TYPE_NAME: + return isDictionary ? + new Types64Int2DecimalPageReader(dictionary, length, + ((DecimalTypeInfo) hiveType).getPrecision(), + ((DecimalTypeInfo) hiveType).getScale()) : + new Types64Int2DecimalPageReader(valuesReader, length, + ((DecimalTypeInfo) hiveType).getPrecision(), + ((DecimalTypeInfo) hiveType).getScale()); default: return isDictionary ? new TypesFromInt64PageReader(dictionary, length) : new TypesFromInt64PageReader(valuesReader, length); http://git-wip-us.apache.org/repos/asf/hive/blob/9cd62587/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java index 4e6993b..e89a736 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java @@ -23,10 +23,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.page.PageReader; +import org.apache.parquet.schema.DecimalMetadata; import org.apache.parquet.schema.Type; import java.io.IOException; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; + /** * It's column level Parquet reader which is used to read a batch of records for a column, * part of the code is referred from Apache Spark and Apache Parquet. @@ -179,8 +183,13 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader readRepetitionAndDefinitionLevels(); if (definitionLevel >= maxDefLevel) { c.vector[rowId] = dataColumn.readDouble(); - c.isNull[rowId] = false; - c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + if (dataColumn.isValid(c.vector[rowId])) { + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.vector[rowId] = 0; + setNullValue(c, rowId); + } } else { setNullValue(c, rowId); } @@ -217,8 +226,13 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader readRepetitionAndDefinitionLevels(); if (definitionLevel >= maxDefLevel) { c.vector[rowId] = dataColumn.readLong(); - c.isNull[rowId] = false; - c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + if (dataColumn.isValid(c.vector[rowId])) { + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.vector[rowId] = 0; + setNullValue(c, rowId); + } } else { setNullValue(c, rowId); } @@ -236,8 +250,13 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader readRepetitionAndDefinitionLevels(); if (definitionLevel >= maxDefLevel) { c.vector[rowId] = dataColumn.readFloat(); - c.isNull[rowId] = false; - c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + if (dataColumn.isValid(c.vector[rowId])) { + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.vector[rowId] = 0; + setNullValue(c, rowId); + } } else { setNullValue(c, rowId); } @@ -250,16 +269,28 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader int total, DecimalColumnVector c, int rowId) throws IOException { - decimalTypeCheck(type); + + DecimalMetadata decimalMetadata = type.asPrimitiveType().getDecimalMetadata(); + fillDecimalPrecisionScale(decimalMetadata, c); + int left = total; - c.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision(); - c.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale(); while (left > 0) { readRepetitionAndDefinitionLevels(); if (definitionLevel >= maxDefLevel) { - c.vector[rowId].set(dataColumn.readDecimal(), c.scale); - c.isNull[rowId] = false; - c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + if (decimalMetadata != null) { + c.vector[rowId].set(dataColumn.readDecimal(), c.scale); + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + long value = dataColumn.readLong(); + if (dataColumn.isValid(value)) { + c.vector[rowId].setFromLong(value); + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + setNullValue(c, rowId); + } + } } else { setNullValue(c, rowId); } @@ -454,14 +485,26 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader } break; case DECIMAL: - decimalTypeCheck(type); + DecimalMetadata decimalMetadata = type.asPrimitiveType().getDecimalMetadata(); DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column); - decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision(); - decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale(); - for (int i = rowId; i < rowId + num; ++i) { - decimalColumnVector.vector[i] - .set(dictionary.readDecimal((int) dictionaryIds.vector[i]), - decimalColumnVector.scale); + + fillDecimalPrecisionScale(decimalMetadata, decimalColumnVector); + + if (decimalMetadata != null) { + for (int i = rowId; i < rowId + num; ++i) { + decimalColumnVector.vector[i].set(dictionary.readDecimal((int) dictionaryIds.vector[i]), + decimalColumnVector.scale); + } + } else { + for (int i = rowId; i < rowId + num; ++i) { + long value = dictionary.readLong((int) dictionaryIds.vector[i]); + if (dictionary.isValid(value)) { + decimalColumnVector.vector[i] + .setFromLong(dictionary.readLong((int) dictionaryIds.vector[i])); + } else { + setNullValue(column, i); + } + } } break; case TIMESTAMP: @@ -475,4 +518,22 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader throw new UnsupportedOperationException("Unsupported type: " + type); } } + + private void fillDecimalPrecisionScale(DecimalMetadata decimalMetadata, + DecimalColumnVector decimalColumnVector) { + if (decimalMetadata != null) { + decimalColumnVector.precision = + (short) type.asPrimitiveType().getDecimalMetadata().getPrecision(); + decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale(); + } else if (type.asPrimitiveType().getPrimitiveTypeName() == INT32) { + decimalColumnVector.precision = 10; + decimalColumnVector.scale = 0; + } else if (type.asPrimitiveType().getPrimitiveTypeName() == INT64) { + decimalColumnVector.precision = 19; + decimalColumnVector.scale = 0; + } else { + throw new UnsupportedOperationException( + "The underlying Parquet type cannot be converted to Hive Decimal type: " + type); + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/9cd62587/ql/src/test/queries/clientpositive/read_uint_parquet.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/read_uint_parquet.q b/ql/src/test/queries/clientpositive/read_uint_parquet.q index 09b0b76..baf328e 100644 --- a/ql/src/test/queries/clientpositive/read_uint_parquet.q +++ b/ql/src/test/queries/clientpositive/read_uint_parquet.q @@ -42,6 +42,69 @@ load data local inpath '../../data/files/data_including_invalid_values.parquet' select * from testtinyintinv; drop table testtinyintinv; +create table testfloatinv +(col_INT32_UINT_8 float, + col_INT32_UINT_16 float, + col_INT32_UINT_32 float, + col_INT64_UINT_64 float) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testfloatinv; +select * from testfloatinv; +drop table testfloatinv; + +create table testdoubleinv +(col_INT32_UINT_8 double, + col_INT32_UINT_16 double, + col_INT32_UINT_32 double, + col_INT64_UINT_64 double) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdoubleinv; +select * from testdoubleinv; +drop table testdoubleinv; + +create table testdecimal22_2inv +(col_INT32_UINT_8 decimal(22,2), + col_INT32_UINT_16 decimal(22,2), + col_INT32_UINT_32 decimal(22,2), + col_INT64_UINT_64 decimal(22,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal22_2inv; +select * from testdecimal22_2inv; +drop table testdecimal22_2inv; + +create table testdecimal13_2inv +(col_INT32_UINT_8 decimal(13,2), + col_INT32_UINT_16 decimal(13,2), + col_INT32_UINT_32 decimal(13,2), + col_INT64_UINT_64 decimal(13,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal13_2inv; +select * from testdecimal13_2inv; +drop table testdecimal13_2inv; + +create table testdecimal8_2inv +(col_INT32_UINT_8 decimal(8,2), + col_INT32_UINT_16 decimal(8,2), + col_INT32_UINT_32 decimal(8,2), + col_INT64_UINT_64 decimal(8,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal8_2inv; +select * from testdecimal8_2inv; +drop table testdecimal8_2inv; + +create table testdecimal6_2inv +(col_INT32_UINT_8 decimal(6,2), + col_INT32_UINT_16 decimal(6,2), + col_INT32_UINT_32 decimal(6,2), + col_INT64_UINT_64 decimal(6,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal6_2inv; +select * from testdecimal6_2inv; +drop table testdecimal6_2inv; + +create table testdecimal3_2inv +(col_INT32_UINT_8 decimal(3,2), + col_INT32_UINT_16 decimal(3,2), + col_INT32_UINT_32 decimal(3,2), + col_INT64_UINT_64 decimal(3,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal3_2inv; +select * from testdecimal3_2inv; +drop table testdecimal3_2inv; + create table testbigintvalid (col_INT32_UINT_8 bigint, col_INT32_UINT_16 bigint, @@ -77,3 +140,66 @@ create table testtinyintvalid load data local inpath '../../data/files/data_with_valid_values.parquet' into table testtinyintvalid; select * from testtinyintvalid; drop table testtinyintvalid; + +create table testfloatvalid +(col_INT32_UINT_8 float, + col_INT32_UINT_16 float, + col_INT32_UINT_32 float, + col_INT64_UINT_64 float) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testfloatvalid; +select * from testfloatvalid; +drop table testfloatvalid; + +create table testdoublevalid +(col_INT32_UINT_8 double, + col_INT32_UINT_16 double, + col_INT32_UINT_32 double, + col_INT64_UINT_64 double) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdoublevalid; +select * from testdoublevalid; +drop table testdoublevalid; + +create table testdecimal22_2valid +(col_INT32_UINT_8 decimal(22,2), + col_INT32_UINT_16 decimal(22,2), + col_INT32_UINT_32 decimal(22,2), + col_INT64_UINT_64 decimal(22,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal22_2valid; +select * from testdecimal22_2valid; +drop table testdecimal22_2valid; + +create table testdecimal13_2valid +(col_INT32_UINT_8 decimal(13,2), + col_INT32_UINT_16 decimal(13,2), + col_INT32_UINT_32 decimal(13,2), + col_INT64_UINT_64 decimal(13,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal13_2valid; +select * from testdecimal13_2valid; +drop table testdecimal13_2valid; + +create table testdecimal8_2valid +(col_INT32_UINT_8 decimal(8,2), + col_INT32_UINT_16 decimal(8,2), + col_INT32_UINT_32 decimal(8,2), + col_INT64_UINT_64 decimal(8,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal8_2valid; +select * from testdecimal8_2valid; +drop table testdecimal8_2valid; + +create table testdecimal6_2valid +(col_INT32_UINT_8 decimal(6,2), + col_INT32_UINT_16 decimal(6,2), + col_INT32_UINT_32 decimal(6,2), + col_INT64_UINT_64 decimal(6,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal6_2valid; +select * from testdecimal6_2valid; +drop table testdecimal6_2valid; + +create table testdecimal3_2valid +(col_INT32_UINT_8 decimal(3,2), + col_INT32_UINT_16 decimal(3,2), + col_INT32_UINT_32 decimal(3,2), + col_INT64_UINT_64 decimal(3,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal3_2valid; +select * from testdecimal3_2valid; +drop table testdecimal3_2valid; http://git-wip-us.apache.org/repos/asf/hive/blob/9cd62587/ql/src/test/queries/clientpositive/read_uint_parquet_vectorized.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/read_uint_parquet_vectorized.q b/ql/src/test/queries/clientpositive/read_uint_parquet_vectorized.q index 6fcc802..f6b067b 100644 --- a/ql/src/test/queries/clientpositive/read_uint_parquet_vectorized.q +++ b/ql/src/test/queries/clientpositive/read_uint_parquet_vectorized.q @@ -1,5 +1,6 @@ -- Enable vectorization SET hive.vectorized.execution.enabled=true; +SET hive.fetch.task.conversion=none; create table testbasicint (uint_32_col int) stored as parquet; load data local inpath '../../data/files/test_uint.parquet' into table testbasicint; @@ -42,6 +43,69 @@ load data local inpath '../../data/files/data_including_invalid_values.parquet' select * from testtinyintinv; drop table testtinyintinv; +create table testfloatinv +(col_INT32_UINT_8 float, + col_INT32_UINT_16 float, + col_INT32_UINT_32 float, + col_INT64_UINT_64 float) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testfloatinv; +select * from testfloatinv; +drop table testfloatinv; + +create table testdoubleinv +(col_INT32_UINT_8 double, + col_INT32_UINT_16 double, + col_INT32_UINT_32 double, + col_INT64_UINT_64 double) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdoubleinv; +select * from testdoubleinv; +drop table testdoubleinv; + +create table testdecimal22_2inv +(col_INT32_UINT_8 decimal(22,2), + col_INT32_UINT_16 decimal(22,2), + col_INT32_UINT_32 decimal(22,2), + col_INT64_UINT_64 decimal(22,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal22_2inv; +select * from testdecimal22_2inv; +drop table testdecimal22_2inv; + +create table testdecimal13_2inv +(col_INT32_UINT_8 decimal(13,2), + col_INT32_UINT_16 decimal(13,2), + col_INT32_UINT_32 decimal(13,2), + col_INT64_UINT_64 decimal(13,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal13_2inv; +select * from testdecimal13_2inv; +drop table testdecimal13_2inv; + +create table testdecimal8_2inv +(col_INT32_UINT_8 decimal(8,2), + col_INT32_UINT_16 decimal(8,2), + col_INT32_UINT_32 decimal(8,2), + col_INT64_UINT_64 decimal(8,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal8_2inv; +select * from testdecimal8_2inv; +drop table testdecimal8_2inv; + +create table testdecimal6_2inv +(col_INT32_UINT_8 decimal(6,2), + col_INT32_UINT_16 decimal(6,2), + col_INT32_UINT_32 decimal(6,2), + col_INT64_UINT_64 decimal(6,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal6_2inv; +select * from testdecimal6_2inv; +drop table testdecimal6_2inv; + +create table testdecimal3_2inv +(col_INT32_UINT_8 decimal(3,2), + col_INT32_UINT_16 decimal(3,2), + col_INT32_UINT_32 decimal(3,2), + col_INT64_UINT_64 decimal(3,2)) stored as parquet; +load data local inpath '../../data/files/data_including_invalid_values.parquet' into table testdecimal3_2inv; +select * from testdecimal3_2inv; +drop table testdecimal3_2inv; + create table testbigintvalid (col_INT32_UINT_8 bigint, col_INT32_UINT_16 bigint, @@ -77,3 +141,66 @@ create table testtinyintvalid load data local inpath '../../data/files/data_with_valid_values.parquet' into table testtinyintvalid; select * from testtinyintvalid; drop table testtinyintvalid; + +create table testfloatvalid +(col_INT32_UINT_8 float, + col_INT32_UINT_16 float, + col_INT32_UINT_32 float, + col_INT64_UINT_64 float) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testfloatvalid; +select * from testfloatvalid; +drop table testfloatvalid; + +create table testdoublevalid +(col_INT32_UINT_8 double, + col_INT32_UINT_16 double, + col_INT32_UINT_32 double, + col_INT64_UINT_64 double) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdoublevalid; +select * from testdoublevalid; +drop table testdoublevalid; + +create table testdecimal22_2valid +(col_INT32_UINT_8 decimal(22,2), + col_INT32_UINT_16 decimal(22,2), + col_INT32_UINT_32 decimal(22,2), + col_INT64_UINT_64 decimal(22,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal22_2valid; +select * from testdecimal22_2valid; +drop table testdecimal22_2valid; + +create table testdecimal13_2valid +(col_INT32_UINT_8 decimal(13,2), + col_INT32_UINT_16 decimal(13,2), + col_INT32_UINT_32 decimal(13,2), + col_INT64_UINT_64 decimal(13,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal13_2valid; +select * from testdecimal13_2valid; +drop table testdecimal13_2valid; + +create table testdecimal8_2valid +(col_INT32_UINT_8 decimal(8,2), + col_INT32_UINT_16 decimal(8,2), + col_INT32_UINT_32 decimal(8,2), + col_INT64_UINT_64 decimal(8,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal8_2valid; +select * from testdecimal8_2valid; +drop table testdecimal8_2valid; + +create table testdecimal6_2valid +(col_INT32_UINT_8 decimal(6,2), + col_INT32_UINT_16 decimal(6,2), + col_INT32_UINT_32 decimal(6,2), + col_INT64_UINT_64 decimal(6,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal6_2valid; +select * from testdecimal6_2valid; +drop table testdecimal6_2valid; + +create table testdecimal3_2valid +(col_INT32_UINT_8 decimal(3,2), + col_INT32_UINT_16 decimal(3,2), + col_INT32_UINT_32 decimal(3,2), + col_INT64_UINT_64 decimal(3,2)) stored as parquet; +load data local inpath '../../data/files/data_with_valid_values.parquet' into table testdecimal3_2valid; +select * from testdecimal3_2valid; +drop table testdecimal3_2valid; http://git-wip-us.apache.org/repos/asf/hive/blob/9cd62587/ql/src/test/queries/clientpositive/type_change_test_int.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/type_change_test_int.q b/ql/src/test/queries/clientpositive/type_change_test_int.q index 02ecc3d..0e98242 100644 --- a/ql/src/test/queries/clientpositive/type_change_test_int.q +++ b/ql/src/test/queries/clientpositive/type_change_test_int.q @@ -91,6 +91,81 @@ alter table testAltColT replace columns select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColT order by cId; +-- bigint, int, smallint, and tinyint: type changed to float +alter table testAltColT replace columns +(cId TINYINT, + cBigInt FLOAT, + cInt FLOAT, + cSmallInt FLOAT, + cTinyint FLOAT); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColT order by cId; + +-- bigint, int, smallint, and tinyint: type changed to double +alter table testAltColT replace columns +(cId TINYINT, + cBigInt DOUBLE, + cInt DOUBLE, + cSmallInt DOUBLE, + cTinyint DOUBLE); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColT order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- all values fit and should return all values +alter table testAltColT replace columns +(cId TINYINT, + cBigInt DECIMAL(22,2), + cInt DECIMAL(22,2), + cSmallInt DECIMAL(22,2), + cTinyint DECIMAL(22,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColT order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int doesn't fit and should return null where it didn't fit +alter table testAltColT replace columns +(cId TINYINT, + cBigInt DECIMAL(13,2), + cInt DECIMAL(13,2), + cSmallInt DECIMAL(13,2), + cTinyint DECIMAL(13,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColT order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int and int doesn't fit and should return null where it didn't fit +alter table testAltColT replace columns +(cId TINYINT, + cBigInt DECIMAL(8,2), + cInt DECIMAL(8,2), + cSmallInt DECIMAL(8,2), + cTinyint DECIMAL(8,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColT order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int, int and small int doesn't fit and should return null where it didn't fit +alter table testAltColT replace columns +(cId TINYINT, + cBigInt DECIMAL(6,2), + cInt DECIMAL(6,2), + cSmallInt DECIMAL(6,2), + cTinyint DECIMAL(6,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColT order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- only single digit fits and should return null where it didn't fit +alter table testAltColT replace columns +(cId TINYINT, + cBigInt DECIMAL(3,2), + cInt DECIMAL(3,2), + cSmallInt DECIMAL(3,2), + cTinyint DECIMAL(3,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColT order by cId; + drop table if exists testAltColT; -- Text type: End @@ -141,6 +216,81 @@ alter table testAltColSF replace columns select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColSF order by cId; +-- bigint, int, smallint, and tinyint: type changed to float +alter table testAltColSF replace columns +(cId TINYINT, + cBigInt FLOAT, + cInt FLOAT, + cSmallInt FLOAT, + cTinyint FLOAT); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColSF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to double +alter table testAltColSF replace columns +(cId TINYINT, + cBigInt DOUBLE, + cInt DOUBLE, + cSmallInt DOUBLE, + cTinyint DOUBLE); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColSF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- all values fit and should return all values +alter table testAltColSF replace columns +(cId TINYINT, + cBigInt DECIMAL(22,2), + cInt DECIMAL(22,2), + cSmallInt DECIMAL(22,2), + cTinyint DECIMAL(22,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColSF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int doesn't fit and should return null where it didn't fit +alter table testAltColSF replace columns +(cId TINYINT, + cBigInt DECIMAL(13,2), + cInt DECIMAL(13,2), + cSmallInt DECIMAL(13,2), + cTinyint DECIMAL(13,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColSF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int and int doesn't fit and should return null where it didn't fit +alter table testAltColSF replace columns +(cId TINYINT, + cBigInt DECIMAL(8,2), + cInt DECIMAL(8,2), + cSmallInt DECIMAL(8,2), + cTinyint DECIMAL(8,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColSF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int, int and small int doesn't fit and should return null where it didn't fit +alter table testAltColSF replace columns +(cId TINYINT, + cBigInt DECIMAL(6,2), + cInt DECIMAL(6,2), + cSmallInt DECIMAL(6,2), + cTinyint DECIMAL(6,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColSF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- only single digit fits and should return null where it didn't fit +alter table testAltColSF replace columns +(cId TINYINT, + cBigInt DECIMAL(3,2), + cInt DECIMAL(3,2), + cSmallInt DECIMAL(3,2), + cTinyint DECIMAL(3,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColSF order by cId; + drop table if exists testAltColSF; -- Sequence File type: End @@ -191,6 +341,81 @@ alter table testAltColRCF replace columns select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColRCF order by cId; +-- bigint, int, smallint, and tinyint: type changed to float +alter table testAltColRCF replace columns +(cId TINYINT, + cBigInt FLOAT, + cInt FLOAT, + cSmallInt FLOAT, + cTinyint FLOAT); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColRCF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to double +alter table testAltColRCF replace columns +(cId TINYINT, + cBigInt DOUBLE, + cInt DOUBLE, + cSmallInt DOUBLE, + cTinyint DOUBLE); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColRCF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- all values fit and should return all values +alter table testAltColRCF replace columns +(cId TINYINT, + cBigInt DECIMAL(22,2), + cInt DECIMAL(22,2), + cSmallInt DECIMAL(22,2), + cTinyint DECIMAL(22,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColRCF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int doesn't fit and should return null where it didn't fit +alter table testAltColRCF replace columns +(cId TINYINT, + cBigInt DECIMAL(13,2), + cInt DECIMAL(13,2), + cSmallInt DECIMAL(13,2), + cTinyint DECIMAL(13,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColRCF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int and int doesn't fit and should return null where it didn't fit +alter table testAltColRCF replace columns +(cId TINYINT, + cBigInt DECIMAL(8,2), + cInt DECIMAL(8,2), + cSmallInt DECIMAL(8,2), + cTinyint DECIMAL(8,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColRCF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int, int and small int doesn't fit and should return null where it didn't fit +alter table testAltColRCF replace columns +(cId TINYINT, + cBigInt DECIMAL(6,2), + cInt DECIMAL(6,2), + cSmallInt DECIMAL(6,2), + cTinyint DECIMAL(6,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColRCF order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- only single digit fits and should return null where it didn't fit +alter table testAltColRCF replace columns +(cId TINYINT, + cBigInt DECIMAL(3,2), + cInt DECIMAL(3,2), + cSmallInt DECIMAL(3,2), + cTinyint DECIMAL(3,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColRCF order by cId; + drop table if exists testAltColRCF; -- RCFile type: End @@ -241,6 +466,81 @@ alter table testAltColORC replace columns select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColORC order by cId; +-- bigint, int, smallint, and tinyint: type changed to float +alter table testAltColORC replace columns +(cId TINYINT, + cBigInt FLOAT, + cInt FLOAT, + cSmallInt FLOAT, + cTinyint FLOAT); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColORC order by cId; + +-- bigint, int, smallint, and tinyint: type changed to double +alter table testAltColORC replace columns +(cId TINYINT, + cBigInt DOUBLE, + cInt DOUBLE, + cSmallInt DOUBLE, + cTinyint DOUBLE); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColORC order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- all values fit and should return all values +alter table testAltColORC replace columns +(cId TINYINT, + cBigInt DECIMAL(22,2), + cInt DECIMAL(22,2), + cSmallInt DECIMAL(22,2), + cTinyint DECIMAL(22,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColORC order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int doesn't fit and should return null where it didn't fit +alter table testAltColORC replace columns +(cId TINYINT, + cBigInt DECIMAL(13,2), + cInt DECIMAL(13,2), + cSmallInt DECIMAL(13,2), + cTinyint DECIMAL(13,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColORC order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int and int doesn't fit and should return null where it didn't fit +alter table testAltColORC replace columns +(cId TINYINT, + cBigInt DECIMAL(8,2), + cInt DECIMAL(8,2), + cSmallInt DECIMAL(8,2), + cTinyint DECIMAL(8,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColORC order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int, int and small int doesn't fit and should return null where it didn't fit +alter table testAltColORC replace columns +(cId TINYINT, + cBigInt DECIMAL(6,2), + cInt DECIMAL(6,2), + cSmallInt DECIMAL(6,2), + cTinyint DECIMAL(6,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColORC order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- only single digit fits and should return null where it didn't fit +alter table testAltColORC replace columns +(cId TINYINT, + cBigInt DECIMAL(3,2), + cInt DECIMAL(3,2), + cSmallInt DECIMAL(3,2), + cTinyint DECIMAL(3,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColORC order by cId; + drop table if exists testAltColORC; -- ORC type: End @@ -291,6 +591,81 @@ alter table testAltColPDE replace columns select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDE order by cId; +-- bigint, int, smallint, and tinyint: type changed to float +alter table testAltColPDE replace columns +(cId TINYINT, + cBigInt FLOAT, + cInt FLOAT, + cSmallInt FLOAT, + cTinyint FLOAT); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDE order by cId; + +-- bigint, int, smallint, and tinyint: type changed to double +alter table testAltColPDE replace columns +(cId TINYINT, + cBigInt DOUBLE, + cInt DOUBLE, + cSmallInt DOUBLE, + cTinyint DOUBLE); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDE order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- all values fit and should return all values +alter table testAltColPDE replace columns +(cId TINYINT, + cBigInt DECIMAL(22,2), + cInt DECIMAL(22,2), + cSmallInt DECIMAL(22,2), + cTinyint DECIMAL(22,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDE order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int doesn't fit and should return null where it didn't fit +alter table testAltColPDE replace columns +(cId TINYINT, + cBigInt DECIMAL(13,2), + cInt DECIMAL(13,2), + cSmallInt DECIMAL(13,2), + cTinyint DECIMAL(13,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDE order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int and int doesn't fit and should return null where it didn't fit +alter table testAltColPDE replace columns +(cId TINYINT, + cBigInt DECIMAL(8,2), + cInt DECIMAL(8,2), + cSmallInt DECIMAL(8,2), + cTinyint DECIMAL(8,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDE order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int, int and small int doesn't fit and should return null where it didn't fit +alter table testAltColPDE replace columns +(cId TINYINT, + cBigInt DECIMAL(6,2), + cInt DECIMAL(6,2), + cSmallInt DECIMAL(6,2), + cTinyint DECIMAL(6,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDE order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- only single digit fits and should return null where it didn't fit +alter table testAltColPDE replace columns +(cId TINYINT, + cBigInt DECIMAL(3,2), + cInt DECIMAL(3,2), + cSmallInt DECIMAL(3,2), + cTinyint DECIMAL(3,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDE order by cId; + drop table if exists testAltColPDE; -- Parquet type with Dictionary encoding enabled: End @@ -342,5 +717,80 @@ alter table testAltColPDD replace columns select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDD order by cId; +-- bigint, int, smallint, and tinyint: type changed to float +alter table testAltColPDD replace columns +(cId TINYINT, + cBigInt FLOAT, + cInt FLOAT, + cSmallInt FLOAT, + cTinyint FLOAT); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDD order by cId; + +-- bigint, int, smallint, and tinyint: type changed to double +alter table testAltColPDD replace columns +(cId TINYINT, + cBigInt DOUBLE, + cInt DOUBLE, + cSmallInt DOUBLE, + cTinyint DOUBLE); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDD order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- all values fit and should return all values +alter table testAltColPDD replace columns +(cId TINYINT, + cBigInt DECIMAL(22,2), + cInt DECIMAL(22,2), + cSmallInt DECIMAL(22,2), + cTinyint DECIMAL(22,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDD order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int doesn't fit and should return null where it didn't fit +alter table testAltColPDD replace columns +(cId TINYINT, + cBigInt DECIMAL(13,2), + cInt DECIMAL(13,2), + cSmallInt DECIMAL(13,2), + cTinyint DECIMAL(13,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDD order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int and int doesn't fit and should return null where it didn't fit +alter table testAltColPDD replace columns +(cId TINYINT, + cBigInt DECIMAL(8,2), + cInt DECIMAL(8,2), + cSmallInt DECIMAL(8,2), + cTinyint DECIMAL(8,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDD order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- some of big int, int and small int doesn't fit and should return null where it didn't fit +alter table testAltColPDD replace columns +(cId TINYINT, + cBigInt DECIMAL(6,2), + cInt DECIMAL(6,2), + cSmallInt DECIMAL(6,2), + cTinyint DECIMAL(6,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDD order by cId; + +-- bigint, int, smallint, and tinyint: type changed to decimal +-- only single digit fits and should return null where it didn't fit +alter table testAltColPDD replace columns +(cId TINYINT, + cBigInt DECIMAL(3,2), + cInt DECIMAL(3,2), + cSmallInt DECIMAL(3,2), + cTinyint DECIMAL(3,2)); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColPDD order by cId; + drop table if exists testAltColPDD; -- Parquet type with Dictionary encoding enabled: End