Drill-418: Fixes for Parquet nullable reader bug.

Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/7ad6de69
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/7ad6de69
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/7ad6de69

Branch: refs/heads/master
Commit: 7ad6de699303f5ba60f0d4e4034b8bbd7edd15c4
Parents: e753ade
Author: Jason Altekruse <[email protected]>
Authored: Mon Mar 17 09:12:19 2014 -0700
Committer: Jacques Nadeau <[email protected]>
Committed: Mon Mar 17 09:12:19 2014 -0700

----------------------------------------------------------------------
 .../drill/exec/store/parquet/ColumnReader.java     |  2 +-
 .../exec/store/parquet/NullableColumnReader.java   |  4 ++--
 .../store/parquet/ParquetRecordReaderTest.java     | 17 ++++++++++++-----
 .../exec/store/parquet/TestFileGenerator.java      | 12 ++++++++++++
 pom.xml                                            |  4 ++--
 5 files changed, 29 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java
index 2cc126c..d5c88ef 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java
@@ -46,7 +46,7 @@ abstract class ColumnReader {
   final boolean isFixedLength;
 
   // counter for the total number of values read from one or more pages
-  // when a batch is filled all of these values should be the same for each 
column
+  // when a batch is filled all of these values should be the same for all of 
the columns
   int totalValuesRead;
   
   // counter for the values that have been read in this pass (a single call to 
the next() method)

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java
index 4c33aeb..9be8266 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java
@@ -63,7 +63,7 @@ abstract class NullableColumnReader extends ColumnReader{
       long runStart = pageReadStatus.readPosInBytes;
       int runLength = 0;
       int currentDefinitionLevel = 0;
-      int currentValueIndexInVector = totalValuesRead;
+      int currentValueIndexInVector = (int) recordsReadInThisIteration;
       boolean lastValueWasNull = true;
       int definitionLevelsRead;
       // loop to find the longest run of defined values available, can be 
preceded by several nulls
@@ -77,7 +77,7 @@ abstract class NullableColumnReader extends ColumnReader{
         }
         while(currentValueIndexInVector - totalValuesRead < 
recordsToReadInThisPass
             && currentValueIndexInVector < 
valueVecHolder.getValueVector().getValueCapacity()
-            && definitionLevelsRead < 
pageReadStatus.currentPage.getValueCount()){
+            && pageReadStatus.valuesRead + definitionLevelsRead < 
pageReadStatus.currentPage.getValueCount()){
           currentDefinitionLevel = 
pageReadStatus.definitionLevels.readInteger();
           definitionLevelsRead++;
           if ( currentDefinitionLevel < 
columnDescriptor.getMaxDefinitionLevel()){

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
index 2b043aa..f6a8aa4 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
@@ -78,7 +78,6 @@ public class ParquetRecordReaderTest {
   static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(ParquetRecordReaderTest.class);
 
   static boolean VERBOSE_DEBUG = false;
-  private boolean checkValues = true;
 
   static final int numberRowGroups = 1;
   static final int recordsPerRowGroup = 300;
@@ -163,8 +162,6 @@ public class ParquetRecordReaderTest {
 
     RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
 
-    checkValues = false;
-
     DrillConfig config = DrillConfig.create();
 
     try(Drillbit bit1 = new Drillbit(config, serviceSet); DrillClient client = 
new DrillClient(config, serviceSet.getCoordinator());){
@@ -203,8 +200,6 @@ public class ParquetRecordReaderTest {
 
     DrillConfig config = DrillConfig.create();
 
-    checkValues = false;
-
     try(DrillClient client = new DrillClient(config);){
       client.connect();
       RecordBatchLoader batchLoader = new 
RecordBatchLoader(client.getAllocator());
@@ -327,6 +322,18 @@ public class ParquetRecordReaderTest {
         "/tmp/test.parquet", i, props);
   }
 
+
+  @Ignore
+  @Test
+  public void testReadBug_Drill_418() throws Exception {
+    HashMap<String, FieldInfo> fields = new HashMap<>();
+    ParquetTestProperties props = new ParquetTestProperties(5, 300000, 
DEFAULT_BYTES_PER_PAGE, fields);
+    TestFileGenerator.populateDrill_418_fields(props);
+    String readEntries = "\"/tmp/customer.plain.parquet\"";
+    testParquetFullEngineEventBased(false, false, 
"/parquet/parquet_scan_screen_read_entry_replace.json", readEntries,
+        "unused, no file is generated", 1, props, true);
+  }
+
   // requires binary file generated by pig from TPCH data, also have to 
disable assert where data is coming in
   @Ignore
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java
index 689b168..d8892dc 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java
@@ -57,6 +57,18 @@ public class TestFileGenerator {
   static final Object[] binVals = { varLen1, varLen2, varLen3 };
   static final Object[] bin2Vals = { varLen3, varLen2, varLen1 };
 
+  static void populateDrill_418_fields(ParquetTestProperties props){
+
+    props.fields.put("cust_key", new FieldInfo("int32", "integer", 32, 
intVals, TypeProtos.MinorType.INT, props));
+    props.fields.put("nation_key", new FieldInfo("int32", "integer", 32, 
intVals, TypeProtos.MinorType.INT, props));
+    props.fields.put("acctbal", new FieldInfo("int32", "integer", 32, intVals, 
TypeProtos.MinorType.INT, props));
+    props.fields.put("name", new FieldInfo("int32", "integer", 32, intVals, 
TypeProtos.MinorType.INT, props));
+    props.fields.put("address", new FieldInfo("int32", "integer", 32, intVals, 
TypeProtos.MinorType.INT, props));
+    props.fields.put("phone", new FieldInfo("int32", "integer", 32, intVals, 
TypeProtos.MinorType.INT, props));
+    props.fields.put("mktsegment", new FieldInfo("int32", "integer", 32, 
intVals, TypeProtos.MinorType.INT, props));
+    props.fields.put("comment_col", new FieldInfo("int32", "integer", 32, 
intVals, TypeProtos.MinorType.INT, props));
+  }
+
   static void populateFieldInfoMap(ParquetTestProperties props){
     props.fields.put("integer", new FieldInfo("int32", "integer", 32, intVals, 
TypeProtos.MinorType.INT, props));
     props.fields.put("bigInt", new FieldInfo("int64", "bigInt", 64, longVals, 
TypeProtos.MinorType.BIGINT, props));

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 37fcbf5..e0b96fa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,9 +115,9 @@
             <exclude>**/.buildpath</exclude>
             <exclude>**/*.proto</exclude>
             <exclude>**/*.fmpp</exclude>
-            <exclude>**/*.tdd</exclude>
-            <exclude>**/*.iml</exclude>
             <exclude>**/target/**</exclude>
+            <exclude>**/*.iml</exclude>
+            <exclude>**/*.tdd</exclude>
           </excludes>
         </configuration>
       </plugin>

Reply via email to