Repository: hive Updated Branches: refs/heads/branch-3 b1c8b2ad3 -> eb842b75b
http://git-wip-us.apache.org/repos/asf/hive/blob/eb842b75/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinarySerde.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinarySerde.java b/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinarySerde.java new file mode 100644 index 0000000..ccf5f44 --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinarySerde.java @@ -0,0 +1,597 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.teradata; + +import com.google.common.collect.ImmutableMap; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeSpec; +import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.hive.common.type.Date; + +import javax.annotation.Nullable; +import java.io.ByteArrayInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import static java.lang.String.format; + +/** + * https://cwiki.apache.org/confluence/display/Hive/TeradataBinarySerde. + * TeradataBinarySerde handles the serialization and deserialization of Teradata Binary Record + * passed from TeradataBinaryRecordReader. + * + * The Teradata Binary Record uses little-endian to handle the SHORT, INT, LONG, DOUBLE... + * We extend SwappedDataInputStream to handle these types and extend to handle the Teradata + * specific types like VARCHAR, CHAR, TIMESTAMP, DATE... + * + * Currently we support 11 Teradata data types: VARCHAR ,INTEGER, TIMESTAMP, FLOAT, DATE, + * BYTEINT, BIGINT, CHARACTER, DECIMAL, SMALLINT, VARBYTE. + * The mapping between Teradata data type and Hive data type is + * Teradata Data Type: Hive Data Type + * VARCHAR: VARCHAR, + * INTEGER: INT, + * TIMESTAMP: TIMESTAMP, + * FLOAT: DOUBLE, + * DATE: DATE, + * BYTEINT: TINYINTÂ , + * BIGINT: BIGINT, + * CHAR: CHAR, + * DECIMAL: DECIMAL, + * SMALLINT: SMALLINT, + * VARBYTE: BINARY. + * + * TeradataBinarySerde currently doesn't support complex types like MAP, ARRAY and STRUCT. + */ +@SerDeSpec(schemaProps = { serdeConstants.LIST_COLUMNS, + serdeConstants.LIST_COLUMN_TYPES }) public class TeradataBinarySerde extends AbstractSerDe { + private static final Log LOG = LogFactory.getLog(TeradataBinarySerde.class); + + public static final String TD_SCHEMA_LITERAL = "teradata.schema.literal"; + + private StructObjectInspector rowOI; + private ArrayList<Object> row; + private byte[] inForNull; + + private int numCols; + private List<String> columnNames; + private List<TypeInfo> columnTypes; + + private TeradataBinaryDataOutputStream out; + private BytesWritable serializeBytesWritable; + private byte[] outForNull; + + public static final String TD_TIMESTAMP_PRECISION = "teradata.timestamp.precision"; + private int timestampPrecision; + private static final int DEFAULT_TIMESTAMP_BYTE_NUM = 19; + private static final String DEFAULT_TIMESTAMP_PRECISION = "6"; + + public static final String TD_CHAR_SET = "teradata.char.charset"; + private String charCharset; + private static final String DEFAULT_CHAR_CHARSET = "UNICODE"; + private static final Map<String, Integer> CHARSET_TO_BYTE_NUM = ImmutableMap.of("LATIN", 2, "UNICODE", 3); + + /** + * Initialize the HiveSerializer. + * + * @param conf + * System properties. Can be null in compile time + * @param tbl + * table properties + * @throws SerDeException + */ + @Override public void initialize(@Nullable Configuration conf, Properties tbl) throws SerDeException { + columnNames = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS).split(",")); + + String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); + LOG.debug(serdeConstants.LIST_COLUMN_TYPES + ": " + columnTypeProperty); + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList<TypeInfo>(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } + + assert columnNames.size() == columnTypes.size(); + numCols = columnNames.size(); + + // get the configured teradata timestamp precision + // you can configure to generate timestamp of different precision in the binary file generated by TPT/BTEQ + timestampPrecision = Integer.parseInt(tbl.getProperty(TD_TIMESTAMP_PRECISION, DEFAULT_TIMESTAMP_PRECISION)); + + // get the configured teradata char charset + // in TD, latin charset will have 2 bytes per char and unicode will have 3 bytes per char + charCharset = tbl.getProperty(TD_CHAR_SET, DEFAULT_CHAR_CHARSET); + if (!CHARSET_TO_BYTE_NUM.containsKey(charCharset)) { + throw new SerDeException( + format("%s isn't supported in Teradata Char Charset %s", charCharset, CHARSET_TO_BYTE_NUM.keySet())); + } + + // All columns have to be primitive. + // Constructing the row ObjectInspector: + List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(numCols); + for (int i = 0; i < numCols; i++) { + if (columnTypes.get(i).getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new SerDeException( + getClass().getName() + " only accepts primitive columns, but column[" + i + "] named " + columnNames.get(i) + + " has category " + columnTypes.get(i).getCategory()); + } + columnOIs.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(columnTypes.get(i))); + } + + rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); + + // Constructing the row object and will be reused for all rows + row = new ArrayList<Object>(numCols); + for (int i = 0; i < numCols; i++) { + row.add(null); + } + + // Initialize vars related to Null Array which represents the null bitmap + int byteNumForNullArray = (numCols / 8) + ((numCols % 8 == 0) ? 0 : 1); + LOG.debug(format("The Null Bytes for each record will have %s bytes", byteNumForNullArray)); + inForNull = new byte[byteNumForNullArray]; + + out = new TeradataBinaryDataOutputStream(); + serializeBytesWritable = new BytesWritable(); + outForNull = new byte[byteNumForNullArray]; + } + + /** + * Returns the Writable class that would be returned by the serialize method. + * This is used to initialize SequenceFile header. + */ + @Override public Class<? extends Writable> getSerializedClass() { + return ByteWritable.class; + } + + /** + * Serialize an object by navigating inside the Object with the + * ObjectInspector. In most cases, the return value of this function will be + * constant since the function will reuse the Writable object. If the client + * wants to keep a copy of the Writable, the client needs to clone the + * returned value. + + * @param obj + * @param objInspector + */ + @Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { + try { + out.reset(); + final StructObjectInspector outputRowOI = (StructObjectInspector) objInspector; + final List<? extends StructField> fieldRefs = outputRowOI.getAllStructFieldRefs(); + + if (fieldRefs.size() != numCols) { + throw new SerDeException( + "Cannot serialize the object because there are " + fieldRefs.size() + " fieldRefs but the table defined " + + numCols + " columns."); + } + + // Fully refresh the Null Array to write into the out + for (int i = 0; i < numCols; i++) { + Object objectForField = outputRowOI.getStructFieldData(obj, fieldRefs.get(i)); + if (objectForField == null) { + outForNull[i / 8] = (byte) (outForNull[i / 8] | (0x01 << (7 - (i % 8)))); + } else { + outForNull[i / 8] = (byte) (outForNull[i / 8] & ~(0x01 << (7 - (i % 8)))); + } + } + out.write(outForNull); + + // serialize each field using FieldObjectInspector + for (int i = 0; i < numCols; i++) { + Object objectForField = outputRowOI.getStructFieldData(obj, fieldRefs.get(i)); + serializeField(objectForField, fieldRefs.get(i).getFieldObjectInspector(), columnTypes.get(i)); + } + + serializeBytesWritable.set(out.toByteArray(), 0, out.size()); + return serializeBytesWritable; + } catch (IOException e) { + throw new SerDeException(e); + } + } + + private void serializeField(Object objectForField, ObjectInspector oi, TypeInfo ti) + throws IOException, SerDeException { + switch (oi.getCategory()) { + case PRIMITIVE: + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + switch (poi.getPrimitiveCategory()) { + // Teradata Type: BYTEINT + case BYTE: + ByteObjectInspector boi = (ByteObjectInspector) poi; + byte b = 0; + if (objectForField != null) { + b = boi.get(objectForField); + } + out.write(b); + return; + // Teradata Type: SMALLINT + case SHORT: + ShortObjectInspector spoi = (ShortObjectInspector) poi; + short s = 0; + if (objectForField != null) { + s = spoi.get(objectForField); + } + out.writeShort(s); + return; + // Teradata Type: INT + case INT: + IntObjectInspector ioi = (IntObjectInspector) poi; + int i = 0; + if (objectForField != null) { + i = ioi.get(objectForField); + } + out.writeInt(i); + return; + // Teradata Type: BIGINT + case LONG: + LongObjectInspector loi = (LongObjectInspector) poi; + long l = 0; + if (objectForField != null) { + l = loi.get(objectForField); + } + out.writeLong(l); + return; + // Teradata Type: FLOAT + case DOUBLE: + DoubleObjectInspector doi = (DoubleObjectInspector) poi; + double d = 0; + if (objectForField != null) { + d = doi.get(objectForField); + } + out.writeDouble(d); + return; + // Teradata Type: VARCHAR + case VARCHAR: + HiveVarcharObjectInspector hvoi = (HiveVarcharObjectInspector) poi; + HiveVarcharWritable hv = hvoi.getPrimitiveWritableObject(objectForField); + // assert the length of varchar record fits into the table definition + if (hv != null) { + assert ((VarcharTypeInfo) ti).getLength() >= hv.getHiveVarchar().getCharacterLength(); + } + out.writeVarChar(hv); + return; + // Teradata Type: TIMESTAMP + case TIMESTAMP: + TimestampObjectInspector tsoi = (TimestampObjectInspector) poi; + TimestampWritableV2 ts = tsoi.getPrimitiveWritableObject(objectForField); + out.writeTimestamp(ts, getTimeStampByteNum(timestampPrecision)); + return; + // Teradata Type: DATE + case DATE: + DateObjectInspector dtoi = (DateObjectInspector) poi; + DateWritableV2 dw = dtoi.getPrimitiveWritableObject(objectForField); + out.writeDate(dw); + return; + // Teradata Type: CHAR + case CHAR: + HiveCharObjectInspector coi = (HiveCharObjectInspector) poi; + HiveCharWritable hc = coi.getPrimitiveWritableObject(objectForField); + // assert the length of char record fits into the table definition + if (hc != null) { + assert ((CharTypeInfo) ti).getLength() >= hc.getHiveChar().getCharacterLength(); + } + out.writeChar(hc, getCharByteNum(charCharset) * ((CharTypeInfo) ti).getLength()); + return; + // Teradata Type: DECIMAL + case DECIMAL: + DecimalTypeInfo dtype = (DecimalTypeInfo) ti; + int precision = dtype.precision(); + int scale = dtype.scale(); + HiveDecimalObjectInspector hdoi = (HiveDecimalObjectInspector) poi; + HiveDecimalWritable hd = hdoi.getPrimitiveWritableObject(objectForField); + // assert the precision of decimal record fits into the table definition + if (hd != null) { + assert (dtype.getPrecision() >= hd.precision()); + } + out.writeDecimal(hd, getDecimalByteNum(precision), scale); + return; + // Teradata Type: VARBYTE + case BINARY: + BinaryObjectInspector bnoi = (BinaryObjectInspector) poi; + BytesWritable byw = bnoi.getPrimitiveWritableObject(objectForField); + out.writeVarByte(byw); + return; + default: + throw new SerDeException("Unrecognized type: " + poi.getPrimitiveCategory()); + } + // Currently, serialization of complex types is not supported + case LIST: + case MAP: + case STRUCT: + default: + throw new SerDeException("Unrecognized type: " + oi.getCategory()); + } + } + + @Override public SerDeStats getSerDeStats() { + // no support for statistics + return null; + } + + /** + * Deserialize an object out of a Writable blob. In most cases, the return + * value of this function will be constant since the function will reuse the + * returned object. If the client wants to keep a copy of the object, the + * client needs to clone the returned value by calling + * ObjectInspectorUtils.getStandardObject(). + * + * @param blob + * The Writable object containing a serialized object + * @return A Java object representing the contents in the blob. + */ + @Override public Object deserialize(Writable blob) throws SerDeException { + try { + BytesWritable data = (BytesWritable) blob; + + // initialize the data to be the input stream + TeradataBinaryDataInputStream in = + new TeradataBinaryDataInputStream(new ByteArrayInputStream(data.getBytes(), 0, data.getLength())); + + int numOfByteRead = in.read(inForNull); + + if (inForNull.length != 0 && numOfByteRead != inForNull.length) { + throw new EOFException("not enough bytes for one object"); + } + + boolean isNull; + for (int i = 0; i < numCols; i++) { + // get if the ith field is null or not + isNull = ((inForNull[i / 8] & (128 >> (i % 8))) != 0); + row.set(i, deserializeField(in, columnTypes.get(i), row.get(i), isNull)); + } + + //After deserializing all the fields, the input should be over in which case in.read will return -1 + if (in.read() != -1) { + throw new EOFException("The inputstream has more after we deserialize all the fields - this is unexpected"); + } + } catch (EOFException e) { + LOG.warn("Catch thrown exception", e); + LOG.warn("This record has been polluted. We have reset all the row fields to be null"); + for (int i = 0; i < numCols; i++) { + row.set(i, null); + } + } catch (IOException e) { + throw new SerDeException(e); + } catch (ParseException e) { + throw new SerDeException(e); + } + return row; + } + + private Object deserializeField(TeradataBinaryDataInputStream in, TypeInfo type, Object reuse, boolean isNull) + throws IOException, ParseException, SerDeException { + // isNull: + // In the Teradata Binary file, even the field is null (isNull=true), + // thd data still has some default values to pad the record. + // In this case, you cannot avoid reading the bytes even it is not used. + switch (type.getCategory()) { + case PRIMITIVE: + PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type; + switch (ptype.getPrimitiveCategory()) { + case VARCHAR: // Teradata Type: VARCHAR + String st = in.readVarchar(); + if (isNull) { + return null; + } else { + HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse; + r.set(st, ((VarcharTypeInfo) type).getLength()); + return r; + } + case INT: // Teradata Type: INT + int i = in.readInt(); + if (isNull) { + return null; + } else { + IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse; + r.set(i); + return r; + } + case TIMESTAMP: // Teradata Type: TIMESTAMP + Timestamp ts = in.readTimestamp(getTimeStampByteNum(timestampPrecision)); + if (isNull) { + return null; + } else { + TimestampWritableV2 r = reuse == null ? new TimestampWritableV2() : (TimestampWritableV2) reuse; + r.set(ts); + return r; + } + case DOUBLE: // Teradata Type: FLOAT + double d = in.readDouble(); + if (isNull) { + return null; + } else { + DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse; + r.set(d); + return r; + } + case DATE: // Teradata Type: DATE + Date dt = in.readDate(); + if (isNull) { + return null; + } else { + DateWritableV2 r = reuse == null ? new DateWritableV2() : (DateWritableV2) reuse; + r.set(dt); + return r; + } + case BYTE: // Teradata Type: BYTEINT + byte bt = in.readByte(); + if (isNull) { + return null; + } else { + ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse; + r.set(bt); + return r; + } + case LONG: // Teradata Type: BIGINT + long l = in.readLong(); + if (isNull) { + return null; + } else { + LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse; + r.set(l); + return r; + } + case CHAR: // Teradata Type: CHAR + CharTypeInfo ctype = (CharTypeInfo) type; + int length = ctype.getLength(); + String c = in.readChar(length * getCharByteNum(charCharset)); + if (isNull) { + return null; + } else { + HiveCharWritable r = reuse == null ? new HiveCharWritable() : (HiveCharWritable) reuse; + r.set(c, length); + return r; + } + case DECIMAL: // Teradata Type: DECIMAL + DecimalTypeInfo dtype = (DecimalTypeInfo) type; + int precision = dtype.precision(); + int scale = dtype.scale(); + HiveDecimal hd = in.readDecimal(scale, getDecimalByteNum(precision)); + if (isNull) { + return null; + } else { + HiveDecimalWritable r = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse); + r.set(hd); + return r; + } + case SHORT: // Teradata Type: SMALLINT + short s = in.readShort(); + if (isNull) { + return null; + } else { + ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse; + r.set(s); + return r; + } + case BINARY: // Teradata Type: VARBYTE + byte[] content = in.readVarbyte(); + if (isNull) { + return null; + } else { + BytesWritable r = new BytesWritable(); + r.set(content, 0, content.length); + return r; + } + default: + throw new SerDeException("Unrecognized type: " + ptype.getPrimitiveCategory()); + } + // Currently, deserialization of complex types is not supported + case LIST: + case MAP: + case STRUCT: + default: + throw new SerDeException("Unsupported category: " + type.getCategory()); + } + } + + /** + * Get the object inspector that can be used to navigate through the internal + * structure of the Object returned from deserialize(...). + */ + @Override public ObjectInspector getObjectInspector() throws SerDeException { + return rowOI; + } + + private int getTimeStampByteNum(int precision) { + if (precision == 0) { + return DEFAULT_TIMESTAMP_BYTE_NUM; + } else { + return precision + 1 + DEFAULT_TIMESTAMP_BYTE_NUM; + } + } + + private int getCharByteNum(String charset) throws SerDeException { + if (!CHARSET_TO_BYTE_NUM.containsKey(charCharset)) { + throw new SerDeException( + format("%s isn't supported in Teradata Char Charset %s", charCharset, CHARSET_TO_BYTE_NUM.keySet())); + } else { + return CHARSET_TO_BYTE_NUM.get(charset); + } + } + + private int getDecimalByteNum(int precision) throws SerDeException { + if (precision <= 0) { + throw new SerDeException(format("the precision of Decimal should be bigger than 0. %d is illegal", precision)); + } + if (precision <= 2) { + return 1; + } + if (precision <= 4) { + return 2; + } + if (precision <= 9) { + return 4; + } + if (precision <= 18) { + return 8; + } + if (precision <= 38) { + return 16; + } + throw new IllegalArgumentException( + format("the precision of Decimal should be smaller than 39. %d is illegal", precision)); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/eb842b75/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDate.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDate.java b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDate.java new file mode 100644 index 0000000..af81fe3 --- /dev/null +++ b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDate.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.teradata; + +import com.google.common.io.BaseEncoding; +import junit.framework.TestCase; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.io.BytesWritable; +import org.junit.Assert; + +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +/** + * Test the data type DATE for Teradata binary format. + */ +public class TestTeradataBinarySerdeForDate extends TestCase { + + private final TeradataBinarySerde serde = new TeradataBinarySerde(); + private final Properties props = new Properties(); + + protected void setUp() throws Exception { + props.setProperty(serdeConstants.LIST_COLUMNS, "TD_DATE"); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, "date"); + serde.initialize(null, props); + } + + public void testTimestampBefore1900() throws Exception { + + //0060-01-01 + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("00653de7fe")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Date ts = ((DateWritableV2) row.get(0)).get(); + Assert.assertEquals(ts.getYear(), 60); + Assert.assertEquals(ts.getMonth(), 1); + Assert.assertEquals(ts.getDay(), 1); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testTimestampAfter1900() throws Exception { + + //9999-01-01 + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("0095cfd304")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Date ts = ((DateWritableV2) row.get(0)).get(); + Assert.assertEquals(ts.getYear(), 9999); + Assert.assertEquals(ts.getMonth(), 1); + Assert.assertEquals(ts.getDay(), 1); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/eb842b75/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDecimal.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDecimal.java b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDecimal.java new file mode 100644 index 0000000..6abdd3f --- /dev/null +++ b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDecimal.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.teradata; + +import com.google.common.io.BaseEncoding; +import junit.framework.TestCase; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.io.BytesWritable; +import org.junit.Assert; + +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +/** + * Test the data type DECIMAL for Teradata binary format. + */ +public class TestTeradataBinarySerdeForDecimal extends TestCase { + + private final TeradataBinarySerde serde = new TeradataBinarySerde(); + private final Properties props = new Properties(); + + protected void setUp() throws Exception { + props.setProperty(serdeConstants.LIST_COLUMNS, "TD_DECIMAL"); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, "decimal(9,5)"); + + serde.initialize(null, props); + } + + public void testPositiveFraction() throws Exception { + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("0064000000")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Assert.assertTrue("0.001".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString())); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testNegativeFraction() throws Exception { + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("009cffffff")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Assert.assertTrue("-0.001".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString())); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testPositiveNumber1() throws Exception { + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("00a0860100")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Assert.assertTrue("1".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString())); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testNegativeNumber1() throws Exception { + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("006079feff")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Assert.assertTrue("-1".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString())); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testPositiveNumber2() throws Exception { + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("0080969800")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Assert.assertTrue("100".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString())); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testNegativeNumber2() throws Exception { + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("000065c4e0")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Assert.assertTrue("-5240".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString())); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/eb842b75/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForTimeStamp.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForTimeStamp.java b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForTimeStamp.java new file mode 100644 index 0000000..a6cf2c1 --- /dev/null +++ b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForTimeStamp.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.teradata; + +import com.google.common.io.BaseEncoding; +import junit.framework.TestCase; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.io.BytesWritable; +import org.junit.Assert; + +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +/** + * Test the data type TIMESTAMP for Teradata binary format. + */ +public class TestTeradataBinarySerdeForTimeStamp extends TestCase { + + private final TeradataBinarySerde serde = new TeradataBinarySerde(); + private final Properties props = new Properties(); + + protected void setUp() throws Exception { + props.setProperty(serdeConstants.LIST_COLUMNS, "TD_TIMESTAMP"); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, "timestamp"); + } + + public void testTimestampPrecision6() throws Exception { + props.setProperty(TeradataBinarySerde.TD_TIMESTAMP_PRECISION, "6"); + serde.initialize(null, props); + + //2012-10-01 12:00:00.110000 + BytesWritable in = new BytesWritable( + BaseEncoding.base16().lowerCase().decode("00323031322d31302d30312031323a30303a30302e313130303030")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Timestamp ts = ((TimestampWritableV2) row.get(0)).getTimestamp(); + Assert.assertEquals(ts.getYear(), 2012); + Assert.assertEquals(ts.getMonth(), 10); + Assert.assertEquals(ts.getDay(), 1); + Assert.assertEquals(ts.getHours(), 12); + Assert.assertEquals(ts.getMinutes(), 0); + Assert.assertEquals(ts.getSeconds(), 0); + Assert.assertEquals(ts.getNanos(), 110000000); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testTimestampPrecision0() throws Exception { + props.setProperty(TeradataBinarySerde.TD_TIMESTAMP_PRECISION, "0"); + serde.initialize(null, props); + + //2012-10-01 12:00:00 + BytesWritable in = + new BytesWritable(BaseEncoding.base16().lowerCase().decode("00323031322d31302d30312031323a30303a3030")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Timestamp ts = ((TimestampWritableV2) row.get(0)).getTimestamp(); + Assert.assertEquals(ts.getYear(), 2012); + Assert.assertEquals(ts.getMonth(), 10); + Assert.assertEquals(ts.getDay(), 1); + Assert.assertEquals(ts.getHours(), 12); + Assert.assertEquals(ts.getMinutes(), 0); + Assert.assertEquals(ts.getSeconds(), 0); + Assert.assertEquals(ts.getNanos(), 0); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testTimestampPrecision3() throws Exception { + props.setProperty(TeradataBinarySerde.TD_TIMESTAMP_PRECISION, "3"); + serde.initialize(null, props); + + //2012-10-01 12:00:00.345 + BytesWritable in = + new BytesWritable(BaseEncoding.base16().lowerCase().decode("00323031322d31302d30312031323a30303a30302e333435")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Timestamp ts = ((TimestampWritableV2) row.get(0)).getTimestamp(); + Assert.assertEquals(ts.getYear(), 2012); + Assert.assertEquals(ts.getMonth(), 10); + Assert.assertEquals(ts.getDay(), 1); + Assert.assertEquals(ts.getHours(), 12); + Assert.assertEquals(ts.getMinutes(), 0); + Assert.assertEquals(ts.getSeconds(), 0); + Assert.assertEquals(ts.getNanos(), 345000000); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/eb842b75/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeGeneral.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeGeneral.java b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeGeneral.java new file mode 100644 index 0000000..c50ef70 --- /dev/null +++ b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeGeneral.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.teradata; + +import com.google.common.io.BaseEncoding; +import junit.framework.TestCase; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Assert; + +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +/** + * Test all the data types supported for Teradata Binary Format. + */ +public class TestTeradataBinarySerdeGeneral extends TestCase { + + private final TeradataBinarySerde serde = new TeradataBinarySerde(); + private final Properties props = new Properties(); + + protected void setUp() throws Exception { + props.setProperty(serdeConstants.LIST_COLUMNS, + "TD_CHAR, TD_VARCHAR, TD_BIGINT, TD_INT, TD_SMALLINT, TD_BYTEINT, " + + "TD_FLOAT,TD_DECIMAL,TD_DATE, TD_TIMESTAMP, TD_VARBYTE"); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, + "char(3),varchar(100),bigint,int,smallint,tinyint,double,decimal(31,30),date,timestamp,binary"); + + serde.initialize(null, props); + } + + public void testDeserializeAndSerialize() throws Exception { + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode( + "00004e6f762020202020201b006120646179203d2031312f31312f31312020202020202020203435ec10000000000000c5feffff" + + "7707010000000000002a40ef2b3dab0d14e6531c8908a72700000007b20100313931312d31312d31312031393a32303a32312e34" + + "33333230301b00746573743a20202020202020343333322020202020202020333135")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Assert.assertEquals("Nov", ((HiveCharWritable) row.get(0)).toString()); + Assert.assertEquals("a day = 11/11/11 45", ((HiveVarcharWritable) row.get(1)).toString()); + Assert.assertEquals(4332L, ((LongWritable) row.get(2)).get()); + Assert.assertEquals(-315, ((IntWritable) row.get(3)).get()); + Assert.assertEquals((short) 1911, ((ShortWritable) row.get(4)).get()); + Assert.assertEquals((byte) 1, ((ByteWritable) row.get(5)).get()); + Assert.assertEquals((double) 13, ((DoubleWritable) row.get(6)).get(), 0); + Assert.assertEquals(30, ((HiveDecimalWritable) row.get(7)).getScale()); + Assert.assertEquals((double) 3.141592653589793238462643383279, + ((HiveDecimalWritable) row.get(7)).getHiveDecimal().doubleValue(), 0); + Assert.assertEquals("1911-11-11", ((DateWritableV2) row.get(8)).toString()); + Assert.assertEquals("1911-11-11 19:20:21.4332", ((TimestampWritableV2) row.get(9)).toString()); + Assert.assertEquals(27, ((BytesWritable) row.get(10)).getLength()); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testDeserializeAndSerializeWithNull() throws Exception { + //null bitmap: 0160 -> 00000001 01100000, 7th, 9th, 10th is null + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode( + "01604d61722020202020201b006120646179203d2031332f30332f303820202020202020202020397ca10000000000004300000" + + "0dd0700000000000048834000000000000000000000000000000000443f110020202020202020202020202020202020202020202" + + "020202020200000")); + List<Object> row = (List<Object>) serde.deserialize(in); + + Assert.assertEquals("Mar", ((HiveCharWritable) row.get(0)).toString()); + Assert.assertEquals(null, row.get(7)); + Assert.assertEquals(null, row.get(9)); + Assert.assertEquals(null, row.get(10)); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testDeserializeAndSerializeAllNull() throws Exception { + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode( + "ffe0202020202020202020000000000000000000000000000000000000000000000000000000000000000000000000000000000" + + "00000000020202020202020202020202020202020202020202020202020200000")); + List<Object> row = (List<Object>) serde.deserialize(in); + + Assert.assertEquals(null, row.get(0)); + Assert.assertEquals(null, row.get(1)); + Assert.assertEquals(null, row.get(3)); + Assert.assertEquals(null, row.get(4)); + Assert.assertEquals(null, row.get(5)); + Assert.assertEquals(null, row.get(6)); + Assert.assertEquals(null, row.get(7)); + Assert.assertEquals(null, row.get(8)); + Assert.assertEquals(null, row.get(9)); + Assert.assertEquals(null, row.get(10)); + + BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector()); + Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes())); + } + + public void testDeserializeCorruptedRecord() throws Exception { + BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode( + "00004e6f762020202020201b006120646179203d2031312f31312f31312020202020202020203435ec10000000000000c5feff" + + "ff7707010000000000002a40ef2b3dab0d14e6531c8908a72700000007b20100313931312d31312d31312031393a32303a32312" + + "e3433333230301b00746573743a20202020202020343333322020202020202020333135ff")); + + List<Object> row = (List<Object>) serde.deserialize(in); + Assert.assertEquals(null, row.get(0)); + Assert.assertEquals(null, row.get(3)); + Assert.assertEquals(null, row.get(10)); + } +}