DRILL-687: Support selecting Date, Timestamp, Decimal data types from Hive into Drill
Add unit test Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/0493582f Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/0493582f Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/0493582f Branch: refs/heads/master Commit: 0493582fe4cdc7bc37a852aeb523ed5e1816ef09 Parents: c7fa50f Author: Mehant Baid <[email protected]> Authored: Sat May 10 19:19:09 2014 -0700 Committer: Jacques Nadeau <[email protected]> Committed: Sun May 11 15:44:32 2014 -0700 ---------------------------------------------------------------------- .../drill/exec/store/hive/HiveRecordReader.java | 54 ++++++++++++++++++-- .../exec/store/hive/HiveTestDataGenerator.java | 36 +++++++++++++ .../apache/drill/jdbc/test/TestJdbcQuery.java | 13 ++++- 3 files changed, 97 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/0493582f/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java index 2544d2b..25931db 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java @@ -18,6 +18,8 @@ package org.apache.drill.exec.store.hive; import java.io.IOException; +import java.sql.Timestamp; +import java.util.Date; import java.util.List; import java.util.Properties; @@ -49,11 +51,16 @@ import org.apache.drill.exec.vector.NullableSmallIntVector; import org.apache.drill.exec.vector.NullableTinyIntVector; import org.apache.drill.exec.vector.NullableVarBinaryVector; import org.apache.drill.exec.vector.NullableVarCharVector; +import org.apache.drill.exec.vector.NullableTimeStampVector; +import org.apache.drill.exec.vector.TimeStampVector; +import org.apache.drill.exec.vector.NullableDateVector; +import org.apache.drill.exec.vector.DateVector; import org.apache.drill.exec.vector.SmallIntVector; import org.apache.drill.exec.vector.TinyIntVector; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.VarCharVector; import org.apache.drill.exec.vector.allocator.VectorAllocator; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Partition; @@ -74,6 +81,9 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; + import com.google.common.collect.Lists; public class HiveRecordReader implements RecordReader { @@ -304,6 +314,32 @@ public class HiveRecordReader implements RecordReader { } break; } + case "timestamp": { + TimeStampVector v = (TimeStampVector) vector; + DateTime ts = new DateTime(((Timestamp) val).getTime()).withZoneRetainFields(DateTimeZone.UTC); + long value = ts.getMillis(); + for (int j = 0; j < recordCount; j++) { + v.getMutator().setSafe(j, value); + } + break; + } + case "date": { + DateVector v = (DateVector) vector; + DateTime date = new DateTime(((Date)val).getTime()).withZoneRetainFields(DateTimeZone.UTC); + long value = date.getMillis(); + for (int j = 0; j < recordCount; j++) { + v.getMutator().setSafe(j, value); + } + break; + } + case "decimal": { + VarCharVector v = (VarCharVector) vector; + byte[] value = ((HiveDecimal) val).toString().getBytes(); + for (int j = 0; j < recordCount; j++) { + v.getMutator().setSafe(j, value); + } + break; + } default: throw new UnsupportedOperationException("Could not determine type: " + selectedPartitionTypes.get(i)); } @@ -343,7 +379,7 @@ public class HiveRecordReader implements RecordReader { case BYTE: return Types.optional(TypeProtos.MinorType.TINYINT); case DECIMAL: - return Types.optional(TypeProtos.MinorType.DECIMAL38SPARSE); + return Types.optional(TypeProtos.MinorType.VARCHAR); case DOUBLE: return Types.optional(TypeProtos.MinorType.FLOAT8); case FLOAT: @@ -357,7 +393,9 @@ public class HiveRecordReader implements RecordReader { case STRING: return Types.optional(TypeProtos.MinorType.VARCHAR); case TIMESTAMP: - + return Types.optional(TypeProtos.MinorType.TIMESTAMP); + case DATE: + return Types.optional(TypeProtos.MinorType.DATE); default: throw new UnsupportedOperationException("Could not determine type: " + pCat); } @@ -373,7 +411,10 @@ public class HiveRecordReader implements RecordReader { case BYTE: return ((NullableTinyIntVector) vv).getMutator().setSafe(index, (byte) fieldValue); case DECIMAL: - throw new UnsupportedOperationException(); + String value = ((HiveDecimal) fieldValue).toString(); + int strLen = value.length(); + byte[] strBytes = value.getBytes(); + return ((NullableVarCharVector) vv).getMutator().setSafe(index, strBytes, 0, strLen); case DOUBLE: return ((NullableFloat8Vector) vv).getMutator().setSafe(index, (double) fieldValue); case FLOAT: @@ -389,8 +430,11 @@ public class HiveRecordReader implements RecordReader { byte[] bytes = ((Text) fieldValue).getBytes(); return ((NullableVarCharVector) vv).getMutator().setSafe(index, bytes, 0, len); case TIMESTAMP: - throw new UnsupportedOperationException(); - + DateTime ts = new DateTime(((Timestamp) fieldValue).getTime()).withZoneRetainFields(DateTimeZone.UTC); + return ((NullableTimeStampVector)vv).getMutator().setSafe(index, ts.getMillis()); + case DATE: + DateTime date = new DateTime(((Date) fieldValue).getTime()).withZoneRetainFields(DateTimeZone.UTC); + return ((NullableDateVector)vv).getMutator().setSafe(index, date.getMillis()); default: throw new UnsupportedOperationException("Could not determine type"); } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/0493582f/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java index 66d5b62..f1565d9 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java @@ -21,6 +21,8 @@ package org.apache.drill.exec.store.hive; import java.io.File; import java.io.IOException; import java.io.PrintWriter; +import java.sql.Date; +import java.sql.Timestamp; import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.FileSystem; @@ -74,6 +76,16 @@ public class HiveTestDataGenerator { executeQuery("CREATE DATABASE IF NOT EXISTS db1"); createTableAndLoadData("db1", "kv_db1", testDataFile); + // Generate data with date and timestamp data type + String testDateDataFile = generateTestDataFileWithDate(); + + // create table with date and timestamp data type + executeQuery("USE default"); + executeQuery("CREATE TABLE IF NOT EXISTS default.foodate(a DATE, b TIMESTAMP) "+ + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); + executeQuery(String.format("LOAD DATA LOCAL INPATH '%s' OVERWRITE INTO TABLE default.foodate", testDateDataFile)); + + ss.close(); } @@ -105,6 +117,30 @@ public class HiveTestDataGenerator { return file.getPath(); } + private String generateTestDataFileWithDate() throws Exception { + File file = null; + while (true) { + file = File.createTempFile("drill-hive-test-date", ".txt"); + if (file.exists()) { + boolean success = file.delete(); + if (success) { + break; + } + } + logger.debug("retry creating tmp file"); + } + + PrintWriter printWriter = new PrintWriter(file); + for (int i=1; i<=5; i++) { + Date date = new Date(System.currentTimeMillis()); + Timestamp ts = new Timestamp(System.currentTimeMillis()); + printWriter.println (String.format("%s,%s", date.toString(), ts.toString())); + } + printWriter.close(); + + return file.getPath(); + } + private void executeQuery(String query) { CommandProcessorResponse response = null; boolean failed = false; http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/0493582f/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java ---------------------------------------------------------------------- diff --git a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java index 75b1ae4..d087f7d 100644 --- a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java +++ b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java @@ -75,6 +75,13 @@ public class TestJdbcQuery extends JdbcTest{ testQuery("select * from hive.`default`.kv"); } + + @Test + public void testHiveWithDate() throws Exception { + testQuery("select * from hive.`default`.foodate"); + testQuery("select date_add(a, time '12:23:33'), b from hive.`default`.foodate"); + } + @Test @Ignore public void testJsonQuery() throws Exception{ @@ -276,8 +283,10 @@ public class TestJdbcQuery extends JdbcTest{ .sql("SHOW TABLES") .returns( "TABLE_SCHEMA=hive.default; TABLE_NAME=kv\n" + + "TABLE_SCHEMA=hive.default; TABLE_NAME=foodate\n" + "TABLE_SCHEMA=hive.db1; TABLE_NAME=kv_db1\n" + "TABLE_SCHEMA=hive; TABLE_NAME=kv\n" + + "TABLE_SCHEMA=hive; TABLE_NAME=foodate\n" + "TABLE_SCHEMA=sys; TABLE_NAME=drillbits\n" + "TABLE_SCHEMA=sys; TABLE_NAME=options\n" + "TABLE_SCHEMA=INFORMATION_SCHEMA; TABLE_NAME=VIEWS\n" + @@ -302,7 +311,9 @@ public class TestJdbcQuery extends JdbcTest{ JdbcAssert.withNoDefaultSchema() .sql("SHOW TABLES IN hive") - .returns("TABLE_SCHEMA=hive; TABLE_NAME=kv\n"); + .returns( + "TABLE_SCHEMA=hive; TABLE_NAME=kv\n" + + "TABLE_SCHEMA=hive; TABLE_NAME=foodate\n"); } @Test
