[ https://issues.apache.org/jira/browse/DRILL-4373?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15613668#comment-15613668 ]
ASF GitHub Bot commented on DRILL-4373: --------------------------------------- Github user parthchandra commented on a diff in the pull request: https://github.com/apache/drill/pull/600#discussion_r85449218 --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java --- @@ -739,30 +741,76 @@ public void runTestAndValidate(String selection, String validationSelection, Str } /* - Test the reading of an int96 field. Impala encodes timestamps as int96 fields + Impala encodes timestamp values as int96 fields. Test the reading of an int96 field with two converters: + the first one converts parquet INT96 into drill VARBINARY and the second one (works while + store.parquet.reader.int96_as_timestamp option is enabled) converts parquet INT96 into drill TIMESTAMP. */ @Test public void testImpalaParquetInt96() throws Exception { compareParquetReadersColumnar("field_impala_ts", "cp.`parquet/int96_impala_1.parquet`"); + try { + test("alter session set %s = true", ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP); + compareParquetReadersColumnar("field_impala_ts", "cp.`parquet/int96_impala_1.parquet`"); + } finally { + test("alter session reset %s", ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP); + } } /* - Test the reading of a binary field where data is in dicationary _and_ non-dictionary encoded pages + Test the reading of a binary field as drill varbinary where data is in dicationary _and_ non-dictionary encoded pages */ @Test - public void testImpalaParquetVarBinary_DictChange() throws Exception { + public void testImpalaParquetBinaryAsVarBinary_DictChange() throws Exception { compareParquetReadersColumnar("field_impala_ts", "cp.`parquet/int96_dict_change.parquet`"); } /* + Test the reading of a binary field as drill timestamp where data is in dicationary _and_ non-dictionary encoded pages + */ + @Test + public void testImpalaParquetBinaryAsTimeStamp_DictChange() throws Exception { + final String WORKING_PATH = TestTools.getWorkingPath(); + final String TEST_RES_PATH = WORKING_PATH + "/src/test/resources"; + try { + testBuilder() + .sqlQuery("select int96_ts from dfs_test.`%s/parquet/int96_dict_change`", TEST_RES_PATH) + .optionSettingQueriesForTestQuery( + "alter session set `%s` = true", ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP) + .ordered() + .csvBaselineFile("testframework/testParquetReader/testInt96DictChange/q1.tsv") + .baselineTypes(TypeProtos.MinorType.TIMESTAMP) + .baselineColumns("int96_ts") + .build().run(); + } finally { + test("alter system reset `%s`", ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP); + } + } + + /* Test the conversion from int96 to impala timestamp */ @Test - public void testImpalaParquetTimestampAsInt96() throws Exception { + public void testTimestampImpalaConvertFrom() throws Exception { compareParquetReadersColumnar("convert_from(field_impala_ts, 'TIMESTAMP_IMPALA')", "cp.`parquet/int96_impala_1.parquet`"); } /* + Test reading parquet Int96 as TimeStamp and comparing obtained values with the + old results (reading the same values as VarBinary and convert_fromTIMESTAMP_IMPALA function using) + */ + @Test + public void testImpalaParquetTimestampInt96AsTimeStamp() throws Exception { --- End diff -- The test testImpalaParquetTimestampInt96AsTimeStamp fails when run in a different timezone. Can you mark this as @Ignore unless you can fix the test to run across different timezones? > Drill and Hive have incompatible timestamp representations in parquet > --------------------------------------------------------------------- > > Key: DRILL-4373 > URL: https://issues.apache.org/jira/browse/DRILL-4373 > Project: Apache Drill > Issue Type: Improvement > Components: Storage - Hive, Storage - Parquet > Affects Versions: 1.8.0 > Reporter: Rahul Challapalli > Assignee: Karthikeyan Manivannan > Labels: doc-impacting > Fix For: 1.9.0 > > > git.commit.id.abbrev=83d460c > I created a parquet file with a timestamp type using Drill. Now if I define a > hive table on top of the parquet file and use "timestamp" as the column type, > drill fails to read the hive table through the hive storage plugin > Implementation: > Added int96 to timestamp converter for both parquet readers and controling it > by system / session option "store.parquet.int96_as_timestamp". > The value of the option is false by default for the proper work of the old > query scripts with the "convert_from TIMESTAMP_IMPALA" function. > When the option is true using of that function is unnesessary and can lead to > the query fail. -- This message was sent by Atlassian JIRA (v6.3.4#6332)