DRILL-901: Fix Parquet read bug with VarBinary. Also now throw an exception if parquet reader is not passed any columns found in the file. Previously a NPE was thrown as the setup method exited early, skipping an object initialization that manifested in the first call to the next method.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/163219c2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/163219c2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/163219c2 Branch: refs/heads/master Commit: 163219c2a802481cbd90171912540250d4059ea8 Parents: 393adee Author: Jason Altekruse <[email protected]> Authored: Wed Jun 4 11:21:32 2014 -0700 Committer: Jacques Nadeau <[email protected]> Committed: Thu Jun 5 09:41:39 2014 -0700 ---------------------------------------------------------------------- .../exec/store/parquet/PageReadStatus.java | 2 +- .../exec/store/parquet/ParquetRecordReader.java | 2 +- .../store/parquet/ParquetRecordReaderTest.java | 9 +++++++ .../test/resources/parquet/par_writer_test.json | 26 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/163219c2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java index ba98f3c..3ad1d6c 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java @@ -110,6 +110,7 @@ final class PageReadStatus { public boolean next() throws IOException { currentPage = null; + valuesRead = 0; // TODO - the metatdata for total size appears to be incorrect for impala generated files, need to find cause // and submit a bug report @@ -162,7 +163,6 @@ final class PageReadStatus { pageDataByteArray = currentPage.getBytes().toByteArray(); readPosInBytes = 0; - valuesRead = 0; if (parentColumnReader.columnDescriptor.getMaxDefinitionLevel() != 0){ parentColumnReader.currDefLevel = -1; if (!currentPage.getValueEncoding().usesDictionary()) { http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/163219c2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java index 6754855..4c5f4bb 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java @@ -228,7 +228,7 @@ public class ParquetRecordReader implements RecordReader { // none of the columns in the parquet file matched the request columns from the query if (columnsToScan == 0){ - return; + throw new ExecutionSetupException("Error reading from parquet file. No columns requested were found in the file."); } if (allFieldsFixedLength) { recordsPerBatch = (int) Math.min(Math.min(batchSize / bitWidthAllFixedFields, http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/163219c2/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java index 82436a3..ad63dc9 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java @@ -352,6 +352,15 @@ public class ParquetRecordReaderTest extends BaseTestQuery{ "/tmp/test.parquet", i, props); } + @Test + public void testReadError_Drill_901() throws Exception { + // select cast( L_COMMENT as varchar) from dfs.`/tmp/drilltest/employee_parquet` + HashMap<String, FieldInfo> fields = new HashMap<>(); + ParquetTestProperties props = new ParquetTestProperties(1, 120350, DEFAULT_BYTES_PER_PAGE, fields); + testParquetFullEngineEventBased(false, false, "/parquet/par_writer_test.json", null, + "unused, no file is generated", 1, props, false); + } + @Ignore @Test http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/163219c2/exec/java-exec/src/test/resources/parquet/par_writer_test.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/parquet/par_writer_test.json b/exec/java-exec/src/test/resources/parquet/par_writer_test.json new file mode 100644 index 0000000..34f2ba6 --- /dev/null +++ b/exec/java-exec/src/test/resources/parquet/par_writer_test.json @@ -0,0 +1,26 @@ + { + head : { + version : 1, + generator : { + type : "manual", + info : "na" + }, + type : "APACHE_DRILL_PHYSICAL" + }, + graph : [ { + pop : "parquet-scan", + @id : 1, + entries : [ { + path : "/tpch/lineitem.parquet" + } ], + storage : { + type : "file", + connection : "classpath:///" + }, + columns: [ "L_COMMENT"] + }, { + pop : "screen", + @id : 2, + child : 1 + } ] + }
