DRILL-901: Fix Parquet read bug with VarBinary.

Also now throw an exception if parquet reader is not passed any columns found 
in the file. Previously a NPE was thrown as the setup method exited early, 
skipping an object initialization that manifested in the first call to the next 
method.


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/163219c2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/163219c2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/163219c2

Branch: refs/heads/master
Commit: 163219c2a802481cbd90171912540250d4059ea8
Parents: 393adee
Author: Jason Altekruse <[email protected]>
Authored: Wed Jun 4 11:21:32 2014 -0700
Committer: Jacques Nadeau <[email protected]>
Committed: Thu Jun 5 09:41:39 2014 -0700

----------------------------------------------------------------------
 .../exec/store/parquet/PageReadStatus.java      |  2 +-
 .../exec/store/parquet/ParquetRecordReader.java |  2 +-
 .../store/parquet/ParquetRecordReaderTest.java  |  9 +++++++
 .../test/resources/parquet/par_writer_test.json | 26 ++++++++++++++++++++
 4 files changed, 37 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/163219c2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java
index ba98f3c..3ad1d6c 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/PageReadStatus.java
@@ -110,6 +110,7 @@ final class PageReadStatus {
   public boolean next() throws IOException {
 
     currentPage = null;
+    valuesRead = 0;
 
     // TODO - the metatdata for total size appears to be incorrect for impala 
generated files, need to find cause
     // and submit a bug report
@@ -162,7 +163,6 @@ final class PageReadStatus {
     pageDataByteArray = currentPage.getBytes().toByteArray();
 
     readPosInBytes = 0;
-    valuesRead = 0;
     if (parentColumnReader.columnDescriptor.getMaxDefinitionLevel() != 0){
       parentColumnReader.currDefLevel = -1;
       if (!currentPage.getValueEncoding().usesDictionary()) {

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/163219c2/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java
index 6754855..4c5f4bb 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordReader.java
@@ -228,7 +228,7 @@ public class ParquetRecordReader implements RecordReader {
 
     // none of the columns in the parquet file matched the request columns 
from the query
     if (columnsToScan == 0){
-      return;
+      throw new ExecutionSetupException("Error reading from parquet file. No 
columns requested were found in the file.");
     }
     if (allFieldsFixedLength) {
       recordsPerBatch = (int) Math.min(Math.min(batchSize / 
bitWidthAllFixedFields,

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/163219c2/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
index 82436a3..ad63dc9 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
@@ -352,6 +352,15 @@ public class ParquetRecordReaderTest extends BaseTestQuery{
         "/tmp/test.parquet", i, props);
   }
 
+  @Test
+  public void testReadError_Drill_901() throws Exception {
+    // select cast( L_COMMENT as varchar) from  
dfs.`/tmp/drilltest/employee_parquet`
+    HashMap<String, FieldInfo> fields = new HashMap<>();
+    ParquetTestProperties props = new ParquetTestProperties(1, 120350, 
DEFAULT_BYTES_PER_PAGE, fields);
+    testParquetFullEngineEventBased(false, false, 
"/parquet/par_writer_test.json", null,
+        "unused, no file is generated", 1, props, false);
+  }
+
 
   @Ignore
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/163219c2/exec/java-exec/src/test/resources/parquet/par_writer_test.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/par_writer_test.json 
b/exec/java-exec/src/test/resources/parquet/par_writer_test.json
new file mode 100644
index 0000000..34f2ba6
--- /dev/null
+++ b/exec/java-exec/src/test/resources/parquet/par_writer_test.json
@@ -0,0 +1,26 @@
+  {
+    head : {
+      version : 1,
+          generator : {
+        type : "manual",
+            info : "na"
+      },
+      type : "APACHE_DRILL_PHYSICAL"
+    },
+    graph : [ {
+    pop : "parquet-scan",
+    @id : 1,
+        entries : [ {
+      path : "/tpch/lineitem.parquet"
+    } ],
+    storage : {
+      type : "file",
+      connection : "classpath:///"
+    },
+    columns: [ "L_COMMENT"]
+  }, {
+    pop : "screen",
+    @id : 2,
+        child : 1
+  } ]
+  }

Reply via email to