http://git-wip-us.apache.org/repos/asf/hive/blob/d467e172/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index defaf90..1201499 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -18,10 +18,21 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.Map; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; @@ -51,6 +62,8 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Charsets; @@ -73,28 +86,16 @@ public class VectorExtractRow { // Extraction can be a subset of columns, so this is the projection -- // the batch column numbers. - Category[] categories; - // The data type category of each column being extracted. - - PrimitiveCategory[] primitiveCategories; - // The data type primitive category of each column being assigned. - - int[] maxLengths; - // For the CHAR and VARCHAR data types, the maximum character length of - // the columns. Otherwise, 0. - - Writable[] primitiveWritables; - // The extracted values will be placed in these writables. + TypeInfo[] typeInfos; + ObjectInspector[] objectInspectors; /* * Allocate the various arrays. */ private void allocateArrays(int count) { projectionColumnNums = new int[count]; - categories = new Category[count]; - primitiveCategories = new PrimitiveCategory[count]; - maxLengths = new int[count]; - primitiveWritables = new Writable[count]; + typeInfos = new TypeInfo[count]; + objectInspectors = new ObjectInspector[count]; } /* @@ -102,28 +103,8 @@ public class VectorExtractRow { */ private void initEntry(int logicalColumnIndex, int projectionColumnNum, TypeInfo typeInfo) { projectionColumnNums[logicalColumnIndex] = projectionColumnNum; - Category category = typeInfo.getCategory(); - categories[logicalColumnIndex] = category; - if (category == Category.PRIMITIVE) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - primitiveCategories[logicalColumnIndex] = primitiveCategory; - - switch (primitiveCategory) { - case CHAR: - maxLengths[logicalColumnIndex] = ((CharTypeInfo) primitiveTypeInfo).getLength(); - break; - case VARCHAR: - maxLengths[logicalColumnIndex] = ((VarcharTypeInfo) primitiveTypeInfo).getLength(); - break; - default: - // No additional data type specific setting. - break; - } - - primitiveWritables[logicalColumnIndex] = - VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory); - } + typeInfos[logicalColumnIndex] = typeInfo; + objectInspectors[logicalColumnIndex] = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); } /* @@ -159,12 +140,7 @@ public class VectorExtractRow { allocateArrays(count); for (int i = 0; i < count; i++) { - - int projectionColumnNum = projectedColumns[i]; - - TypeInfo typeInfo = typeInfos[i]; - - initEntry(i, projectionColumnNum, typeInfo); + initEntry(i, projectedColumns[i], typeInfos[i]); } } @@ -178,11 +154,17 @@ public class VectorExtractRow { allocateArrays(count); for (int i = 0; i < count; i++) { + initEntry(i, i, TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i))); + } + } - TypeInfo typeInfo = - TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); + public void init(TypeInfo[] typeInfos) throws HiveException { - initEntry(i, i, typeInfo); + final int count = typeInfos.length; + allocateArrays(count); + + for (int i = 0; i < count; i++) { + initEntry(i, i, typeInfos[i]); } } @@ -198,76 +180,85 @@ public class VectorExtractRow { * @param logicalColumnIndex * @return */ - public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) { + private Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) { + final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; - ColumnVector colVector = batch.cols[projectionColumnNum]; + final ColumnVector colVector = batch.cols[projectionColumnNum]; + return extractRowColumn( + colVector, typeInfos[logicalColumnIndex], objectInspectors[logicalColumnIndex], batchIndex); + } + + Object extractRowColumn( + ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex) { + if (colVector == null) { // The planner will not include unneeded columns for reading but other parts of execution // may ask for them.. return null; } - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); if (!colVector.noNulls && colVector.isNull[adjustedIndex]) { return null; } - Category category = categories[logicalColumnIndex]; + final Category category = typeInfo.getCategory(); switch (category) { case PRIMITIVE: { - Writable primitiveWritable = - primitiveWritables[logicalColumnIndex]; - PrimitiveCategory primitiveCategory = primitiveCategories[logicalColumnIndex]; + final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + final Writable primitiveWritable = + VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory); switch (primitiveCategory) { case VOID: return null; case BOOLEAN: ((BooleanWritable) primitiveWritable).set( - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex] == 0 ? + ((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true); return primitiveWritable; case BYTE: ((ByteWritable) primitiveWritable).set( - (byte) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (byte) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case SHORT: ((ShortWritable) primitiveWritable).set( - (short) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (short) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case INT: ((IntWritable) primitiveWritable).set( - (int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (int) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case LONG: ((LongWritable) primitiveWritable).set( - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case TIMESTAMP: ((TimestampWritable) primitiveWritable).set( - ((TimestampColumnVector) batch.cols[projectionColumnNum]).asScratchTimestamp(adjustedIndex)); + ((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex)); return primitiveWritable; case DATE: ((DateWritable) primitiveWritable).set( - (int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (int) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case FLOAT: ((FloatWritable) primitiveWritable).set( - (float) ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (float) ((DoubleColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case DOUBLE: ((DoubleWritable) primitiveWritable).set( - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + ((DoubleColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case BINARY: { - BytesColumnVector bytesColVector = - ((BytesColumnVector) batch.cols[projectionColumnNum]); - byte[] bytes = bytesColVector.vector[adjustedIndex]; - int start = bytesColVector.start[adjustedIndex]; - int length = bytesColVector.length[adjustedIndex]; + final BytesColumnVector bytesColVector = + ((BytesColumnVector) colVector); + final byte[] bytes = bytesColVector.vector[adjustedIndex]; + final int start = bytesColVector.start[adjustedIndex]; + final int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { - LOG.info("null binary entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum); + LOG.info("null binary entry: batchIndex " + batchIndex); } BytesWritable bytesWritable = (BytesWritable) primitiveWritable; @@ -276,14 +267,14 @@ public class VectorExtractRow { } case STRING: { - BytesColumnVector bytesColVector = - ((BytesColumnVector) batch.cols[projectionColumnNum]); - byte[] bytes = bytesColVector.vector[adjustedIndex]; - int start = bytesColVector.start[adjustedIndex]; - int length = bytesColVector.length[adjustedIndex]; + final BytesColumnVector bytesColVector = + ((BytesColumnVector) colVector); + final byte[] bytes = bytesColVector.vector[adjustedIndex]; + final int start = bytesColVector.start[adjustedIndex]; + final int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { - nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum); + nullBytesReadError(primitiveCategory, batchIndex); } // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. @@ -292,61 +283,147 @@ public class VectorExtractRow { } case VARCHAR: { - BytesColumnVector bytesColVector = - ((BytesColumnVector) batch.cols[projectionColumnNum]); - byte[] bytes = bytesColVector.vector[adjustedIndex]; - int start = bytesColVector.start[adjustedIndex]; - int length = bytesColVector.length[adjustedIndex]; + final BytesColumnVector bytesColVector = + ((BytesColumnVector) colVector); + final byte[] bytes = bytesColVector.vector[adjustedIndex]; + final int start = bytesColVector.start[adjustedIndex]; + final int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { - nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum); + nullBytesReadError(primitiveCategory, batchIndex); } - int adjustedLength = StringExpr.truncate(bytes, start, length, - maxLengths[logicalColumnIndex]); + final int adjustedLength = StringExpr.truncate(bytes, start, length, + ((VarcharTypeInfo) primitiveTypeInfo).getLength()); - HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable; + final HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable; hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1); return primitiveWritable; } case CHAR: { - BytesColumnVector bytesColVector = - ((BytesColumnVector) batch.cols[projectionColumnNum]); - byte[] bytes = bytesColVector.vector[adjustedIndex]; - int start = bytesColVector.start[adjustedIndex]; - int length = bytesColVector.length[adjustedIndex]; + final BytesColumnVector bytesColVector = + ((BytesColumnVector) colVector); + final byte[] bytes = bytesColVector.vector[adjustedIndex]; + final int start = bytesColVector.start[adjustedIndex]; + final int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { - nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum); + nullBytesReadError(primitiveCategory, batchIndex); } - int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, - maxLengths[logicalColumnIndex]); + final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, + ((CharTypeInfo) primitiveTypeInfo).getLength()); - HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable; + final HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable; hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), - maxLengths[logicalColumnIndex]); + ((CharTypeInfo) primitiveTypeInfo).getLength()); return primitiveWritable; } case DECIMAL: // The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields. ((HiveDecimalWritable) primitiveWritable).set( - ((DecimalColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + ((DecimalColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case INTERVAL_YEAR_MONTH: ((HiveIntervalYearMonthWritable) primitiveWritable).set( - (int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (int) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case INTERVAL_DAY_TIME: ((HiveIntervalDayTimeWritable) primitiveWritable).set( - ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).asScratchIntervalDayTime(adjustedIndex)); + ((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex)); return primitiveWritable; default: throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported"); } } + case LIST: + { + final ListColumnVector listColumnVector = (ListColumnVector) colVector; + final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + final ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector; + final int offset = (int) listColumnVector.offsets[adjustedIndex]; + final int size = (int) listColumnVector.lengths[adjustedIndex]; + + final List list = new ArrayList(); + for (int i = 0; i < size; i++) { + list.add( + extractRowColumn( + listColumnVector.child, + listTypeInfo.getListElementTypeInfo(), + listObjectInspector.getListElementObjectInspector(), + offset + i)); + } + return list; + } + case MAP: + { + final MapColumnVector mapColumnVector = (MapColumnVector) colVector; + final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + final MapObjectInspector mapObjectInspector = (MapObjectInspector) objectInspector; + final int offset = (int) mapColumnVector.offsets[adjustedIndex]; + final int size = (int) mapColumnVector.lengths[adjustedIndex]; + + final Map map = new HashMap(); + for (int i = 0; i < size; i++) { + final Object key = extractRowColumn( + mapColumnVector.keys, + mapTypeInfo.getMapKeyTypeInfo(), + mapObjectInspector.getMapKeyObjectInspector(), + offset + i); + final Object value = extractRowColumn( + mapColumnVector.values, + mapTypeInfo.getMapValueTypeInfo(), + mapObjectInspector.getMapValueObjectInspector(), + offset + i); + map.put(key, value); + } + return map; + } + case STRUCT: + { + final StructColumnVector structColumnVector = (StructColumnVector) colVector; + final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + final StandardStructObjectInspector structInspector = + (StandardStructObjectInspector) objectInspector; + final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + final int size = fieldTypeInfos.size(); + final List<? extends StructField> structFields = + structInspector.getAllStructFieldRefs(); + + final Object struct = structInspector.create(); + for (int i = 0; i < size; i++) { + final StructField structField = structFields.get(i); + final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i); + final Object value = extractRowColumn( + structColumnVector.fields[i], + fieldTypeInfo, + structField.getFieldObjectInspector(), + adjustedIndex); + structInspector.setStructFieldData(struct, structField, value); + } + return struct; + } + case UNION: + { + final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); + final UnionObjectInspector unionInspector = (UnionObjectInspector) objectInspector; + final List<ObjectInspector> unionInspectors = unionInspector.getObjectInspectors(); + final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector; + final byte tag = (byte) unionColumnVector.tags[adjustedIndex]; + final Object object = extractRowColumn( + unionColumnVector.fields[tag], + objectTypeInfos.get(tag), + unionInspectors.get(tag), + adjustedIndex); + + final StandardUnion standardUnion = new StandardUnion(); + standardUnion.setTag(tag); + standardUnion.setObject(object); + return standardUnion; + } default: throw new RuntimeException("Category " + category.name() + " not supported"); } @@ -365,9 +442,8 @@ public class VectorExtractRow { } } - private void nullBytesReadError(PrimitiveCategory primitiveCategory, int batchIndex, - int projectionColumnNum) { + private void nullBytesReadError(PrimitiveCategory primitiveCategory, int batchIndex) { throw new RuntimeException("null " + primitiveCategory.name() + - " entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum); + " entry: batchIndex " + batchIndex); } }
http://git-wip-us.apache.org/repos/asf/hive/blob/d467e172/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java index 319b4a8..be471c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -19,20 +19,28 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; -import java.sql.Timestamp; import java.util.Arrays; import java.util.List; +import java.util.Map; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.ByteStream.Output; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; /** * This class serializes columns from a row in a VectorizedRowBatch into a serialization format. @@ -50,14 +58,18 @@ public final class VectorSerializeRow<T extends SerializeWrite> { private T serializeWrite; - private Category[] categories; - private PrimitiveCategory[] primitiveCategories; + private TypeInfo[] typeInfos; + + private ObjectInspector[] objectInspectors; private int[] outputColumnNums; + private VectorExtractRow vectorExtractRow; + public VectorSerializeRow(T serializeWrite) { this(); this.serializeWrite = serializeWrite; + vectorExtractRow = new VectorExtractRow(); } // Not public since we must have the serialize write object. @@ -67,55 +79,55 @@ public final class VectorSerializeRow<T extends SerializeWrite> { public void init(List<String> typeNames, int[] columnMap) throws HiveException { final int size = typeNames.size(); - categories = new Category[size]; - primitiveCategories = new PrimitiveCategory[size]; + typeInfos = new TypeInfo[size]; outputColumnNums = Arrays.copyOf(columnMap, size); - TypeInfo typeInfo; + objectInspectors = new ObjectInspector[size]; for (int i = 0; i < size; i++) { - typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); - categories[i] = typeInfo.getCategory(); - if (categories[i] == Category.PRIMITIVE) { - primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); - } + final TypeInfo typeInfo = + TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); + typeInfos[i] = typeInfo; + objectInspectors[i] = + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); } + + vectorExtractRow.init(typeInfos, outputColumnNums); } public void init(List<String> typeNames) throws HiveException { final int size = typeNames.size(); - categories = new Category[size]; - primitiveCategories = new PrimitiveCategory[size]; + typeInfos = new TypeInfo[size]; outputColumnNums = new int[size]; - TypeInfo typeInfo; + objectInspectors = new ObjectInspector[size]; for (int i = 0; i < size; i++) { - typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); - categories[i] = typeInfo.getCategory(); - if (categories[i] == Category.PRIMITIVE) { - primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); - } + final TypeInfo typeInfo = + TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); + typeInfos[i] = typeInfo; + objectInspectors[i] = + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); outputColumnNums[i] = i; } + + vectorExtractRow.init(typeInfos); } public void init(TypeInfo[] typeInfos, int[] columnMap) throws HiveException { final int size = typeInfos.length; - categories = new Category[size]; - primitiveCategories = new PrimitiveCategory[size]; + this.typeInfos = Arrays.copyOf(typeInfos, size); outputColumnNums = Arrays.copyOf(columnMap, size); - TypeInfo typeInfo; - for (int i = 0; i < typeInfos.length; i++) { - typeInfo = typeInfos[i]; - categories[i] = typeInfo.getCategory(); - if (categories[i] == Category.PRIMITIVE) { - primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); - } + objectInspectors = new ObjectInspector[size]; + for (int i = 0; i < size; i++) { + objectInspectors[i] = + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfos[i]); } + + vectorExtractRow.init(this.typeInfos, outputColumnNums); } public int getCount() { - return categories.length; + return typeInfos.length; } public void setOutput(Output output) { @@ -137,92 +149,229 @@ public final class VectorSerializeRow<T extends SerializeWrite> { hasAnyNulls = false; isAllNulls = true; - ColumnVector colVector; + for (int i = 0; i < typeInfos.length; i++) { + final ColumnVector colVector = batch.cols[outputColumnNums[i]]; + serializeWrite(colVector, typeInfos[i], objectInspectors[i], batchIndex); + } + } + + private void serializeWrite( + ColumnVector colVector, TypeInfo typeInfo, + ObjectInspector objectInspector, int batchIndex) throws IOException { + int adjustedBatchIndex; - final int size = categories.length; + if (colVector.isRepeating) { + adjustedBatchIndex = 0; + } else { + adjustedBatchIndex = batchIndex; + } + if (!colVector.noNulls && colVector.isNull[adjustedBatchIndex]) { + serializeWrite.writeNull(); + hasAnyNulls = true; + return; + } + isAllNulls = false; + + final Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + serializePrimitiveWrite(colVector, (PrimitiveTypeInfo) typeInfo, adjustedBatchIndex); + break; + case LIST: + serializeListWrite( + (ListColumnVector) colVector, + (ListTypeInfo) typeInfo, + (ListObjectInspector) objectInspector, + adjustedBatchIndex); + break; + case MAP: + serializeMapWrite( + (MapColumnVector) colVector, + (MapTypeInfo) typeInfo, + (MapObjectInspector) objectInspector, + adjustedBatchIndex); + break; + case STRUCT: + serializeStructWrite( + (StructColumnVector) colVector, + (StructTypeInfo) typeInfo, + (StructObjectInspector) objectInspector, + adjustedBatchIndex); + break; + case UNION: + serializeUnionWrite( + (UnionColumnVector) colVector, + (UnionTypeInfo) typeInfo, + (UnionObjectInspector) objectInspector, + adjustedBatchIndex); + break; + default: + throw new RuntimeException("Unexpected category " + category); + } + } + + private void serializeUnionWrite( + UnionColumnVector colVector, UnionTypeInfo typeInfo, + UnionObjectInspector objectInspector, int adjustedBatchIndex) throws IOException { + + final byte tag = (byte) colVector.tags[adjustedBatchIndex]; + final ColumnVector fieldColumnVector = colVector.fields[tag]; + final TypeInfo objectTypeInfo = typeInfo.getAllUnionObjectTypeInfos().get(tag); + + serializeWrite.beginUnion(tag); + serializeWrite( + fieldColumnVector, + objectTypeInfo, + objectInspector.getObjectInspectors().get(tag), + adjustedBatchIndex); + serializeWrite.finishUnion(); + } + + private void serializeStructWrite( + StructColumnVector colVector, StructTypeInfo typeInfo, + StructObjectInspector objectInspector, int adjustedBatchIndex) throws IOException { + + final ColumnVector[] fieldColumnVectors = colVector.fields; + final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); + final List<? extends StructField> structFields = objectInspector.getAllStructFieldRefs(); + final int size = fieldTypeInfos.size(); + + final List list = (List) vectorExtractRow.extractRowColumn( + colVector, typeInfo, objectInspector, adjustedBatchIndex); + + serializeWrite.beginStruct(list); + for (int i = 0; i < size; i++) { + if (i > 0) { + serializeWrite.separateStruct(); + } + serializeWrite( + fieldColumnVectors[i], + fieldTypeInfos.get(i), + structFields.get(i).getFieldObjectInspector(), + adjustedBatchIndex); + } + serializeWrite.finishStruct(); + } + + private void serializeMapWrite( + MapColumnVector colVector, MapTypeInfo typeInfo, + MapObjectInspector objectInspector, int adjustedBatchIndex) throws IOException { + + final ColumnVector keyColumnVector = colVector.keys; + final ColumnVector valueColumnVector = colVector.values; + final TypeInfo keyTypeInfo = typeInfo.getMapKeyTypeInfo(); + final TypeInfo valueTypeInfo = typeInfo.getMapValueTypeInfo(); + final int offset = (int) colVector.offsets[adjustedBatchIndex]; + final int size = (int) colVector.lengths[adjustedBatchIndex]; + + final Map map = (Map) vectorExtractRow.extractRowColumn( + colVector, typeInfo, objectInspector, adjustedBatchIndex); + + serializeWrite.beginMap(map); + for (int i = 0; i < size; i++) { + if (i > 0) { + serializeWrite.separateKeyValuePair(); + } + serializeWrite(keyColumnVector, keyTypeInfo, + objectInspector.getMapKeyObjectInspector(), offset + i); + serializeWrite.separateKey(); + serializeWrite(valueColumnVector, valueTypeInfo, + objectInspector.getMapValueObjectInspector(), offset + i); + } + serializeWrite.finishMap(); + } + + private void serializeListWrite( + ListColumnVector colVector, ListTypeInfo typeInfo, + ListObjectInspector objectInspector, int adjustedBatchIndex) throws IOException { + + final ColumnVector childColumnVector = colVector.child; + final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo(); + final int offset = (int) colVector.offsets[adjustedBatchIndex]; + final int size = (int) colVector.lengths[adjustedBatchIndex]; + + final ObjectInspector elementObjectInspector = objectInspector.getListElementObjectInspector(); + final List list = (List) vectorExtractRow.extractRowColumn( + colVector, typeInfo, objectInspector, adjustedBatchIndex); + + serializeWrite.beginList(list); for (int i = 0; i < size; i++) { - colVector = batch.cols[outputColumnNums[i]]; - if (colVector.isRepeating) { - adjustedBatchIndex = 0; - } else { - adjustedBatchIndex = batchIndex; + if (i > 0) { + serializeWrite.separateList(); + } + serializeWrite( + childColumnVector, elementTypeInfo, elementObjectInspector, offset + i); + } + serializeWrite.finishList(); + } + + private void serializePrimitiveWrite( + ColumnVector colVector, PrimitiveTypeInfo typeInfo, int adjustedBatchIndex) throws IOException { + + final PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + case BOOLEAN: + serializeWrite.writeBoolean(((LongColumnVector) colVector).vector[adjustedBatchIndex] != 0); + break; + case BYTE: + serializeWrite.writeByte((byte) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + break; + case SHORT: + serializeWrite.writeShort((short) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + break; + case INT: + serializeWrite.writeInt((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + break; + case LONG: + serializeWrite.writeLong(((LongColumnVector) colVector).vector[adjustedBatchIndex]); + break; + case DATE: + serializeWrite.writeDate((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + break; + case TIMESTAMP: + serializeWrite.writeTimestamp(((TimestampColumnVector) colVector).asScratchTimestamp(adjustedBatchIndex)); + break; + case FLOAT: + serializeWrite.writeFloat((float) ((DoubleColumnVector) colVector).vector[adjustedBatchIndex]); + break; + case DOUBLE: + serializeWrite.writeDouble(((DoubleColumnVector) colVector).vector[adjustedBatchIndex]); + break; + case STRING: + case CHAR: + case VARCHAR: + { + // We store CHAR and VARCHAR without pads, so write with STRING. + final BytesColumnVector bytesColVector = (BytesColumnVector) colVector; + serializeWrite.writeString( + bytesColVector.vector[adjustedBatchIndex], + bytesColVector.start[adjustedBatchIndex], + bytesColVector.length[adjustedBatchIndex]); } - if (!colVector.noNulls && colVector.isNull[adjustedBatchIndex]) { - serializeWrite.writeNull(); - hasAnyNulls = true; - continue; + break; + case BINARY: + { + final BytesColumnVector bytesColVector = (BytesColumnVector) colVector; + serializeWrite.writeBinary( + bytesColVector.vector[adjustedBatchIndex], + bytesColVector.start[adjustedBatchIndex], + bytesColVector.length[adjustedBatchIndex]); } - isAllNulls = false; - switch (categories[i]) { - case PRIMITIVE: - switch (primitiveCategories[i]) { - case BOOLEAN: - serializeWrite.writeBoolean(((LongColumnVector) colVector).vector[adjustedBatchIndex] != 0); - break; - case BYTE: - serializeWrite.writeByte((byte) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); - break; - case SHORT: - serializeWrite.writeShort((short) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); - break; - case INT: - serializeWrite.writeInt((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); - break; - case LONG: - serializeWrite.writeLong(((LongColumnVector) colVector).vector[adjustedBatchIndex]); - break; - case DATE: - serializeWrite.writeDate((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); - break; - case TIMESTAMP: - serializeWrite.writeTimestamp(((TimestampColumnVector) colVector).asScratchTimestamp(adjustedBatchIndex)); - break; - case FLOAT: - serializeWrite.writeFloat((float) ((DoubleColumnVector) colVector).vector[adjustedBatchIndex]); - break; - case DOUBLE: - serializeWrite.writeDouble(((DoubleColumnVector) colVector).vector[adjustedBatchIndex]); - break; - case STRING: - case CHAR: - case VARCHAR: - { - // We store CHAR and VARCHAR without pads, so write with STRING. - BytesColumnVector bytesColVector = (BytesColumnVector) colVector; - serializeWrite.writeString( - bytesColVector.vector[adjustedBatchIndex], - bytesColVector.start[adjustedBatchIndex], - bytesColVector.length[adjustedBatchIndex]); - } - break; - case BINARY: - { - BytesColumnVector bytesColVector = (BytesColumnVector) colVector; - serializeWrite.writeBinary( - bytesColVector.vector[adjustedBatchIndex], - bytesColVector.start[adjustedBatchIndex], - bytesColVector.length[adjustedBatchIndex]); - } - break; - case DECIMAL: - { - DecimalColumnVector decimalColVector = (DecimalColumnVector) colVector; - serializeWrite.writeHiveDecimal(decimalColVector.vector[adjustedBatchIndex], decimalColVector.scale); - } - break; - case INTERVAL_YEAR_MONTH: - serializeWrite.writeHiveIntervalYearMonth((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); - break; - case INTERVAL_DAY_TIME: - serializeWrite.writeHiveIntervalDayTime(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedBatchIndex)); - break; - default: - throw new RuntimeException("Unexpected primitive category " + primitiveCategories[i]); - } - break; - default: - throw new RuntimeException("Unexpected category " + categories[i]); + break; + case DECIMAL: + { + final DecimalColumnVector decimalColVector = (DecimalColumnVector) colVector; + serializeWrite.writeHiveDecimal(decimalColVector.vector[adjustedBatchIndex], decimalColVector.scale); } + break; + case INTERVAL_YEAR_MONTH: + serializeWrite.writeHiveIntervalYearMonth((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + break; + case INTERVAL_DAY_TIME: + serializeWrite.writeHiveIntervalDayTime(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedBatchIndex)); + break; + default: + throw new RuntimeException("Unexpected primitive category " + primitiveCategory); } } http://git-wip-us.apache.org/repos/asf/hive/blob/d467e172/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java index e9ce8e8..a8748d042 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java @@ -39,7 +39,13 @@ public class TestVectorRowObject extends TestCase { vectorExtractRow.extractRow(batch, i, row); Object[] expectedRow = randomRows[firstRandomRowIndex + i]; for (int c = 0; c < rowSize; c++) { - if (!row[c].equals(expectedRow[c])) { + Object actualValue = row[c]; + Object expectedValue = expectedRow[c]; + if (actualValue == null || expectedValue == null) { + if (actualValue != expectedValue) { + fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch"); + } + } else if (!actualValue.equals(expectedValue)) { fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch"); } } @@ -51,7 +57,8 @@ public class TestVectorRowObject extends TestCase { String[] emptyScratchTypeNames = new String[0]; VectorRandomRowSource source = new VectorRandomRowSource(); - source.init(r); + + source.init(r, VectorRandomRowSource.SupportedTypes.ALL, 4); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); @@ -69,7 +76,7 @@ public class TestVectorRowObject extends TestCase { VectorExtractRow vectorExtractRow = new VectorExtractRow(); vectorExtractRow.init(source.typeNames()); - Object[][] randomRows = source.randomRows(10000); + Object[][] randomRows = source.randomRows(1000); if (sort) { source.sort(randomRows); } http://git-wip-us.apache.org/repos/asf/hive/blob/d467e172/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java index 822fff2..5520cde 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java @@ -19,31 +19,14 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; -import java.sql.Date; -import java.sql.Timestamp; +import java.util.ArrayList; import java.util.Arrays; import java.util.Properties; import java.util.Random; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.OpenCSVSerde; import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; @@ -52,26 +35,18 @@ import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerialize import org.apache.hadoop.hive.serde2.fast.DeserializeRead; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazy.VerifyLazy; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite; +import org.apache.hadoop.hive.serde2.lazy.fast.StringToDouble; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObject; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; - -import com.google.common.base.Charsets; import junit.framework.TestCase; @@ -87,209 +62,56 @@ public class TestVectorSerDeRow extends TestCase { LAZY_SIMPLE } - void deserializeAndVerify(Output output, DeserializeRead deserializeRead, - VectorRandomRowSource source, Object[] expectedRow) - throws HiveException, IOException { - deserializeRead.set(output.getData(), 0, output.getLength()); - PrimitiveCategory[] primitiveCategories = source.primitiveCategories(); - for (int i = 0; i < primitiveCategories.length; i++) { - Object expected = expectedRow[i]; - PrimitiveCategory primitiveCategory = primitiveCategories[i]; - PrimitiveTypeInfo primitiveTypeInfo = source.primitiveTypeInfos()[i]; - if (!deserializeRead.readNextField()) { - throw new HiveException("Unexpected NULL when reading primitiveCategory " + primitiveCategory + - " expected (" + expected.getClass().getName() + ", " + expected.toString() + ") " + - " deserializeRead " + deserializeRead.getClass().getName()); - } - switch (primitiveCategory) { - case BOOLEAN: - { - Boolean value = deserializeRead.currentBoolean; - BooleanWritable expectedWritable = (BooleanWritable) expected; - if (!value.equals(expectedWritable.get())) { - TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")"); - } - } - break; - case BYTE: - { - Byte value = deserializeRead.currentByte; - ByteWritable expectedWritable = (ByteWritable) expected; - if (!value.equals(expectedWritable.get())) { - TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")"); - } - } - break; - case SHORT: - { - Short value = deserializeRead.currentShort; - ShortWritable expectedWritable = (ShortWritable) expected; - if (!value.equals(expectedWritable.get())) { - TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")"); - } - } - break; - case INT: - { - Integer value = deserializeRead.currentInt; - IntWritable expectedWritable = (IntWritable) expected; - if (!value.equals(expectedWritable.get())) { - TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")"); - } - } - break; - case LONG: - { - Long value = deserializeRead.currentLong; - LongWritable expectedWritable = (LongWritable) expected; - if (!value.equals(expectedWritable.get())) { - TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")"); - } - } - break; - case DATE: - { - DateWritable value = deserializeRead.currentDateWritable; - DateWritable expectedWritable = (DateWritable) expected; - if (!value.equals(expectedWritable)) { - TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); - } - } - break; - case FLOAT: - { - Float value = deserializeRead.currentFloat; - FloatWritable expectedWritable = (FloatWritable) expected; - if (!value.equals(expectedWritable.get())) { - TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")"); - } - } - break; - case DOUBLE: - { - Double value = deserializeRead.currentDouble; - DoubleWritable expectedWritable = (DoubleWritable) expected; - if (!value.equals(expectedWritable.get())) { - TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")"); - } + private void verifyRead( + DeserializeRead deserializeRead, TypeInfo typeInfo, Object expectedObject) throws IOException { + + if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) { + VectorVerifyFast.verifyDeserializeRead(deserializeRead, typeInfo, expectedObject); + } else { + Object complexFieldObj = VectorVerifyFast.deserializeReadComplexType(deserializeRead, typeInfo); + if (expectedObject == null) { + if (complexFieldObj != null) { + TestCase.fail("Field reports not null but object is null (class " + complexFieldObj.getClass().getName() + + ", " + complexFieldObj.toString() + ")"); } - break; - case STRING: - case CHAR: - case VARCHAR: - case BINARY: - { - byte[] stringBytes = - Arrays.copyOfRange( - deserializeRead.currentBytes, - deserializeRead.currentBytesStart, - deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); - - Text text = new Text(stringBytes); - String string = text.toString(); - - switch (primitiveCategory) { - case STRING: - { - Text expectedWritable = (Text) expected; - if (!string.equals(expectedWritable.toString())) { - TestCase.fail("String field mismatch (expected '" + expectedWritable.toString() + "' found '" + string + "')"); - } - } - break; - case CHAR: - { - HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength()); - - HiveCharWritable expectedWritable = (HiveCharWritable) expected; - if (!hiveChar.equals(expectedWritable.getHiveChar())) { - TestCase.fail("Char field mismatch (expected '" + expectedWritable.getHiveChar() + "' found '" + hiveChar + "')"); - } - } - break; - case VARCHAR: - { - HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength()); - HiveVarcharWritable expectedWritable = (HiveVarcharWritable) expected; - if (!hiveVarchar.equals(expectedWritable.getHiveVarchar())) { - TestCase.fail("Varchar field mismatch (expected '" + expectedWritable.getHiveVarchar() + "' found '" + hiveVarchar + "')"); - } - } - break; - case BINARY: - { - BytesWritable expectedWritable = (BytesWritable) expected; - if (stringBytes.length != expectedWritable.getLength()){ - TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + stringBytes + ")"); - } - byte[] expectedBytes = expectedWritable.getBytes(); - for (int b = 0; b < stringBytes.length; b++) { - if (stringBytes[b] != expectedBytes[b]) { - TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + stringBytes + ")"); - } - } + } else { + if (complexFieldObj == null) { + // It's hard to distinguish a union with null from a null union. + if (expectedObject instanceof UnionObject) { + UnionObject expectedUnion = (UnionObject) expectedObject; + if (expectedUnion.getObject() == null) { + return; } - break; - default: - throw new HiveException("Unexpected primitive category " + primitiveCategory); - } - } - break; - case DECIMAL: - { - HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal(); - if (value == null) { - TestCase.fail("Decimal field evaluated to NULL"); - } - HiveDecimalWritable expectedWritable = (HiveDecimalWritable) expected; - if (!value.equals(expectedWritable.getHiveDecimal())) { - DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; - int precision = decimalTypeInfo.getPrecision(); - int scale = decimalTypeInfo.getScale(); - TestCase.fail("Decimal field mismatch (expected " + expectedWritable.getHiveDecimal() + " found " + value.toString() + ") precision " + precision + ", scale " + scale); } - } - break; - case TIMESTAMP: - { - Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp(); - TimestampWritable expectedWritable = (TimestampWritable) expected; - if (!value.equals(expectedWritable.getTimestamp())) { - TestCase.fail("Timestamp field mismatch (expected " + expectedWritable.getTimestamp() + " found " + value.toString() + ")"); + TestCase.fail("Field reports null but object is not null (class " + expectedObject.getClass().getName() + + ", " + expectedObject.toString() + ")"); } } - break; - case INTERVAL_YEAR_MONTH: - { - HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth(); - HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) expected; - HiveIntervalYearMonth expectedValue = expectedWritable.getHiveIntervalYearMonth(); - if (!value.equals(expectedValue)) { - TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expectedValue + " found " + value.toString() + ")"); - } + if (!VerifyLazy.lazyCompare(typeInfo, complexFieldObj, expectedObject)) { + TestCase.fail("Comparision failed typeInfo " + typeInfo.toString()); } - break; - case INTERVAL_DAY_TIME: - { - HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime(); - HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) expected; - HiveIntervalDayTime expectedValue = expectedWritable.getHiveIntervalDayTime(); - if (!value.equals(expectedValue)) { - TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expectedValue + " found " + value.toString() + ")"); - } - } - break; - - default: - throw new HiveException("Unexpected primitive category " + primitiveCategory); } + } + + void deserializeAndVerify( + Output output, DeserializeRead deserializeRead, + VectorRandomRowSource source, Object[] expectedRow) + throws HiveException, IOException { + + deserializeRead.set(output.getData(), 0, output.getLength()); + TypeInfo[] typeInfos = source.typeInfos(); + for (int i = 0; i < typeInfos.length; i++) { + Object expected = expectedRow[i]; + TypeInfo typeInfo = typeInfos[i]; + verifyRead(deserializeRead, typeInfo, expected); } TestCase.assertTrue(deserializeRead.isEndOfInputReached()); } - void serializeBatch(VectorizedRowBatch batch, VectorSerializeRow vectorSerializeRow, - DeserializeRead deserializeRead, VectorRandomRowSource source, Object[][] randomRows, - int firstRandomRowIndex) throws HiveException, IOException { + void serializeBatch( + VectorizedRowBatch batch, VectorSerializeRow vectorSerializeRow, + DeserializeRead deserializeRead, VectorRandomRowSource source, Object[][] randomRows, + int firstRandomRowIndex) throws HiveException, IOException { Output output = new Output(); for (int i = 0; i < batch.size; i++) { @@ -312,10 +134,20 @@ public class TestVectorSerDeRow extends TestCase { void testVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException { + for (int i = 0; i < 20; i++) { + innerTestVectorSerializeRow(r, serializationType); + } + } + + void innerTestVectorSerializeRow( + Random r, SerializationType serializationType) + throws HiveException, IOException, SerDeException { + String[] emptyScratchTypeNames = new String[0]; VectorRandomRowSource source = new VectorRandomRowSource(); - source.init(r); + + source.init(r, VectorRandomRowSource.SupportedTypes.ALL, 4, false); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); @@ -329,22 +161,25 @@ public class TestVectorSerDeRow extends TestCase { SerializeWrite serializeWrite; switch (serializationType) { case BINARY_SORTABLE: - deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */ false); + deserializeRead = new BinarySortableDeserializeRead(source.typeInfos(), /* useExternalBuffer */ false); serializeWrite = new BinarySortableSerializeWrite(fieldCount); break; case LAZY_BINARY: - deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */ false); + deserializeRead = new LazyBinaryDeserializeRead(source.typeInfos(), /* useExternalBuffer */ false); serializeWrite = new LazyBinarySerializeWrite(fieldCount); break; case LAZY_SIMPLE: { StructObjectInspector rowObjectInspector = source.rowStructObjectInspector(); - LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector); - byte separator = (byte) '\t'; - deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */ false, - separator, lazySerDeParams); - serializeWrite = new LazySimpleSerializeWrite(fieldCount, - separator, lazySerDeParams); + // Use different separator values. + byte[] separators = new byte[] {(byte) 9, (byte) 2, (byte) 3, (byte) 4, (byte) 5, (byte) 6, (byte) 7, (byte) 8}; + LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector, separators); + deserializeRead = + new LazySimpleDeserializeRead( + source.typeInfos(), + /* useExternalBuffer */ false, + lazySerDeParams); + serializeWrite = new LazySimpleSerializeWrite(fieldCount, lazySerDeParams); } break; default: @@ -353,7 +188,7 @@ public class TestVectorSerDeRow extends TestCase { VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite); vectorSerializeRow.init(source.typeNames()); - Object[][] randomRows = source.randomRows(100000); + Object[][] randomRows = source.randomRows(2000); int firstRandomRowIndex = 0; for (int i = 0; i < randomRows.length; i++) { Object[] row = randomRows[i]; @@ -372,7 +207,7 @@ public class TestVectorSerDeRow extends TestCase { } void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow, - PrimitiveTypeInfo[] primitiveTypeInfos, Object[][] randomRows, int firstRandomRowIndex ) { + TypeInfo[] typeInfos, Object[][] randomRows, int firstRandomRowIndex ) { int rowSize = vectorExtractRow.getCount(); Object[] row = new Object[rowSize]; @@ -385,12 +220,17 @@ public class TestVectorSerDeRow extends TestCase { Object rowObj = row[c]; Object expectedObj = expectedRow[c]; if (rowObj == null) { + if (expectedObj == null) { + continue; + } fail("Unexpected NULL from extractRow. Expected class " + - expectedObj.getClass().getName() + " value " + expectedObj.toString() + + typeInfos[c].getCategory() + " value " + expectedObj + " batch index " + i + " firstRandomRowIndex " + firstRandomRowIndex); } if (!rowObj.equals(expectedObj)) { - fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch (" + primitiveTypeInfos[c].getPrimitiveCategory() + " actual value " + rowObj + " and expected value " + expectedObj + ")"); + fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch (" + + typeInfos[c].getCategory() + " actual value " + rowObj + + " and expected value " + expectedObj + ")"); } } } @@ -400,126 +240,10 @@ public class TestVectorSerDeRow extends TestCase { SerializeWrite serializeWrite) throws HiveException, IOException { Output output = new Output(); serializeWrite.set(output); - PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos(); - for (int i = 0; i < primitiveTypeInfos.length; i++) { - Object object = row[i]; - PrimitiveCategory primitiveCategory = primitiveTypeInfos[i].getPrimitiveCategory(); - switch (primitiveCategory) { - case BOOLEAN: - { - BooleanWritable expectedWritable = (BooleanWritable) object; - boolean value = expectedWritable.get(); - serializeWrite.writeBoolean(value); - } - break; - case BYTE: - { - ByteWritable expectedWritable = (ByteWritable) object; - byte value = expectedWritable.get(); - serializeWrite.writeByte(value); - } - break; - case SHORT: - { - ShortWritable expectedWritable = (ShortWritable) object; - short value = expectedWritable.get(); - serializeWrite.writeShort(value); - } - break; - case INT: - { - IntWritable expectedWritable = (IntWritable) object; - int value = expectedWritable.get(); - serializeWrite.writeInt(value); - } - break; - case LONG: - { - LongWritable expectedWritable = (LongWritable) object; - long value = expectedWritable.get(); - serializeWrite.writeLong(value); - } - break; - case DATE: - { - DateWritable expectedWritable = (DateWritable) object; - Date value = expectedWritable.get(); - serializeWrite.writeDate(value); - } - break; - case FLOAT: - { - FloatWritable expectedWritable = (FloatWritable) object; - float value = expectedWritable.get(); - serializeWrite.writeFloat(value); - } - break; - case DOUBLE: - { - DoubleWritable expectedWritable = (DoubleWritable) object; - double value = expectedWritable.get(); - serializeWrite.writeDouble(value); - } - break; - case STRING: - { - Text text = (Text) object; - serializeWrite.writeString(text.getBytes(), 0, text.getLength()); - } - break; - case CHAR: - { - HiveCharWritable expectedWritable = (HiveCharWritable) object; - HiveChar value = expectedWritable.getHiveChar(); - serializeWrite.writeHiveChar(value); - } - break; - case VARCHAR: - { - HiveVarcharWritable expectedWritable = (HiveVarcharWritable) object; - HiveVarchar value = expectedWritable.getHiveVarchar(); - serializeWrite.writeHiveVarchar(value); - } - break; - case BINARY: - { - BytesWritable expectedWritable = (BytesWritable) object; - byte[] bytes = expectedWritable.getBytes(); - int length = expectedWritable.getLength(); - serializeWrite.writeBinary(bytes, 0, length); - } - break; - case TIMESTAMP: - { - TimestampWritable expectedWritable = (TimestampWritable) object; - Timestamp value = expectedWritable.getTimestamp(); - serializeWrite.writeTimestamp(value); - } - break; - case INTERVAL_YEAR_MONTH: - { - HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) object; - HiveIntervalYearMonth value = expectedWritable.getHiveIntervalYearMonth(); - serializeWrite.writeHiveIntervalYearMonth(value); - } - break; - case INTERVAL_DAY_TIME: - { - HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) object; - HiveIntervalDayTime value = expectedWritable.getHiveIntervalDayTime(); - serializeWrite.writeHiveIntervalDayTime(value); - } - break; - case DECIMAL: - { - HiveDecimalWritable expectedWritable = (HiveDecimalWritable) object; - HiveDecimal value = expectedWritable.getHiveDecimal(); - serializeWrite.writeHiveDecimal(value, ((DecimalTypeInfo)primitiveTypeInfos[i]).scale()); - } - break; - default: - throw new HiveException("Unexpected primitive category " + primitiveCategory); - } + TypeInfo[] typeInfos = source.typeInfos(); + + for (int i = 0; i < typeInfos.length; i++) { + VectorVerifyFast.serializeWrite(serializeWrite, typeInfos[i], row[i]); } return output; } @@ -531,29 +255,47 @@ public class TestVectorSerDeRow extends TestCase { tbl.setProperty("columns", fieldNames); tbl.setProperty("columns.types", fieldTypes); - tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); } - private LazySerDeParameters getSerDeParams( StructObjectInspector rowObjectInspector) throws SerDeException { - return getSerDeParams(new Configuration(), new Properties(), rowObjectInspector); + private LazySerDeParameters getSerDeParams( + StructObjectInspector rowObjectInspector, byte[] separators) throws SerDeException { + return getSerDeParams(new Configuration(), new Properties(), rowObjectInspector, separators); } - private LazySerDeParameters getSerDeParams(Configuration conf, Properties tbl, StructObjectInspector rowObjectInspector) throws SerDeException { + private LazySerDeParameters getSerDeParams( + Configuration conf, Properties tbl, StructObjectInspector rowObjectInspector, + byte[] separators) throws SerDeException { + String fieldNames = ObjectInspectorUtils.getFieldNames(rowObjectInspector); String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowObjectInspector); addToProperties(tbl, fieldNames, fieldTypes); - return new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); + LazySerDeParameters lazySerDeParams = new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); + for (int i = 0; i < separators.length; i++) { + lazySerDeParams.setSeparator(i, separators[i]); + } + return lazySerDeParams; } - void testVectorDeserializeRow(Random r, SerializationType serializationType, - boolean alternate1, boolean alternate2, - boolean useExternalBuffer) - throws HiveException, IOException, SerDeException { + void testVectorDeserializeRow( + Random r, SerializationType serializationType, + boolean alternate1, boolean alternate2, boolean useExternalBuffer) + throws HiveException, IOException, SerDeException { + + for (int i = 0; i < 20; i++) { + innerTestVectorDeserializeRow(r, serializationType, alternate1, alternate2, useExternalBuffer); + } + } + + void innerTestVectorDeserializeRow( + Random r, SerializationType serializationType, + boolean alternate1, boolean alternate2, boolean useExternalBuffer) + throws HiveException, IOException, SerDeException { String[] emptyScratchTypeNames = new String[0]; VectorRandomRowSource source = new VectorRandomRowSource(); - source.init(r); + source.init(r, VectorRandomRowSource.SupportedTypes.ALL, 4, false); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); @@ -564,7 +306,7 @@ public class TestVectorSerDeRow extends TestCase { Arrays.fill(cv.isNull, true); } - PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos(); + TypeInfo[] typeInfos = source.typeInfos(); int fieldCount = source.typeNames().size(); DeserializeRead deserializeRead; SerializeWrite serializeWrite; @@ -572,7 +314,7 @@ public class TestVectorSerDeRow extends TestCase { case BINARY_SORTABLE: boolean useColumnSortOrderIsDesc = alternate1; if (!useColumnSortOrderIsDesc) { - deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer); + deserializeRead = new BinarySortableDeserializeRead(source.typeInfos(), useExternalBuffer); serializeWrite = new BinarySortableSerializeWrite(fieldCount); } else { boolean[] columnSortOrderIsDesc = new boolean[fieldCount]; @@ -596,7 +338,7 @@ public class TestVectorSerDeRow extends TestCase { } } serializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); - deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, + deserializeRead = new BinarySortableDeserializeRead(source.typeInfos(), useExternalBuffer, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); } @@ -606,7 +348,7 @@ public class TestVectorSerDeRow extends TestCase { } break; case LAZY_BINARY: - deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer); + deserializeRead = new LazyBinaryDeserializeRead(source.typeInfos(), useExternalBuffer); serializeWrite = new LazyBinarySerializeWrite(fieldCount); break; case LAZY_SIMPLE: @@ -624,7 +366,8 @@ public class TestVectorSerDeRow extends TestCase { tbl.setProperty(serdeConstants.ESCAPE_CHAR, escapeString); } - LazySerDeParameters lazySerDeParams = getSerDeParams(conf, tbl, rowObjectInspector); + LazySerDeParameters lazySerDeParams = + getSerDeParams(conf, tbl, rowObjectInspector, new byte[] { separator }); if (useLazySimpleEscapes) { // LazySimple seems to throw away everything but \n and \r. @@ -646,10 +389,9 @@ public class TestVectorSerDeRow extends TestCase { source.addEscapables(needsEscapeStr); } } - deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, - separator, lazySerDeParams); - serializeWrite = new LazySimpleSerializeWrite(fieldCount, - separator, lazySerDeParams); + deserializeRead = + new LazySimpleDeserializeRead(source.typeInfos(), useExternalBuffer, lazySerDeParams); + serializeWrite = new LazySimpleSerializeWrite(fieldCount, lazySerDeParams); } break; default: @@ -667,7 +409,7 @@ public class TestVectorSerDeRow extends TestCase { VectorExtractRow vectorExtractRow = new VectorExtractRow(); vectorExtractRow.init(source.typeNames()); - Object[][] randomRows = source.randomRows(100000); + Object[][] randomRows = source.randomRows(2000); int firstRandomRowIndex = 0; for (int i = 0; i < randomRows.length; i++) { Object[] row = randomRows[i]; @@ -684,13 +426,13 @@ public class TestVectorSerDeRow extends TestCase { } batch.size++; if (batch.size == batch.DEFAULT_SIZE) { - examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex); + examineBatch(batch, vectorExtractRow, typeInfos, randomRows, firstRandomRowIndex); firstRandomRowIndex = i + 1; batch.reset(); } } if (batch.size > 0) { - examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex); + examineBatch(batch, vectorExtractRow, typeInfos, randomRows, firstRandomRowIndex); } }
