[ 
https://issues.apache.org/jira/browse/DRILL-7403?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16951304#comment-16951304
 ] 

ASF GitHub Bot commented on DRILL-7403:
---------------------------------------

arina-ielchiieva commented on pull request #1871: DRILL-7403: lidate batch 
checks, vector integretity in unit tests
URL: https://github.com/apache/drill/pull/1871#discussion_r334639886
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java
 ##########
 @@ -101,47 +290,90 @@ private void validateVector(ValueVector vector) {
     }
   }
 
-  private void validateVariableWidthVector(String name, VariableWidthVector 
vector, int entryCount) {
+  private void validateNullableVector(String name, NullableVector vector) {
+    int outerCount = vector.getAccessor().getValueCount();
+    ValueVector valuesVector = vector.getValuesVector();
+    int valueCount = valuesVector.getAccessor().getValueCount();
+    if (valueCount != outerCount) {
+      error(name, vector, String.format(
+          "Outer value count = %d, but inner value count = %d",
+          outerCount, valueCount));
+    }
+    verifyIsSetVector(vector, (UInt1Vector) vector.getBitsVector());
+    validateVector(name + "-values", valuesVector);
+  }
+
+  private void validateVariableWidthVector(String name, VariableWidthVector 
vector) {
 
     // Offsets are in the derived classes. Handle only VarChar for now.
 
     if (vector instanceof VarCharVector) {
-      validateVarCharVector(name, (VarCharVector) vector, entryCount);
+      validateVarCharVector(name, (VarCharVector) vector);
     } else {
       logger.debug("Don't know how to validate vector: " + name + " of class " 
+ vector.getClass().getSimpleName());
     }
   }
 
-  private void validateVarCharVector(String name, VarCharVector vector, int 
entryCount) {
-//    int dataLength = vector.getAllocatedByteCount(); // Includes offsets and 
data.
-    int dataLength = vector.getBuffer().capacity();
-    validateOffsetVector(name + "-offsets", vector.getOffsetVector(), 
entryCount, dataLength);
+  private void validateVarCharVector(String name, VarCharVector vector) {
+    int size = vector.getAccessor().getValueCount();
+
+    // Disabled because a large number of operators
+    // set up offset vectors wrongly.
+    if (size == 0) {
+      return;
+    }
+
+    int dataLength = vector.getBuffer().writerIndex();
+    validateOffsetVector(name + "-offsets", vector.getOffsetVector(), false, 
size, dataLength);
   }
 
   private void validateRepeatedVector(String name, BaseRepeatedValueVector 
vector) {
-
     int dataLength = Integer.MAX_VALUE;
     if (vector instanceof RepeatedVarCharVector) {
-      dataLength = ((RepeatedVarCharVector) 
vector).getOffsetVector().getValueCapacity();
+      dataLength = ((RepeatedVarCharVector) 
vector).getDataVector().getBuffer().writerIndex();
     } else if (vector instanceof RepeatedFixedWidthVectorLike) {
-      dataLength = ((BaseDataValueVector) 
vector.getDataVector()).getBuffer().capacity();
+      dataLength = ((BaseDataValueVector) 
vector.getDataVector()).getBuffer().writerIndex();
     }
-    int itemCount = validateOffsetVector(name + "-offsets", 
vector.getOffsetVector(), rowCount, dataLength);
+    int valueCount = vector.getAccessor().getValueCount();
+    int itemCount = validateOffsetVector(name + "-offsets", 
vector.getOffsetVector(), true, valueCount, dataLength);
 
     // Special handling of repeated VarChar vectors
     // The nested data vectors are not quite exactly like top-level vectors.
 
     ValueVector dataVector = vector.getDataVector();
+    if (dataVector.getAccessor().getValueCount() != itemCount) {
+      error(name, vector, String.format(
+          "Vector has %d values, but offset vector labels %d values",
+          valueCount, itemCount));
+    }
     if (dataVector instanceof VariableWidthVector) {
-      validateVariableWidthVector(name + "-data", (VariableWidthVector) 
dataVector, itemCount);
+      validateVariableWidthVector(name + "-data", (VariableWidthVector) 
dataVector);
     }
   }
 
-  private int validateOffsetVector(String name, UInt4Vector offsetVector, int 
valueCount, int maxOffset) {
+  private void validateFixedWidthVector(String name, FixedWidthVector vector) {
+    // Not much to do
 
 Review comment:
   Could you please expand why?
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Validate batch checks, vector integretity in unit tests
> -------------------------------------------------------
>
>                 Key: DRILL-7403
>                 URL: https://issues.apache.org/jira/browse/DRILL-7403
>             Project: Apache Drill
>          Issue Type: Improvement
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>            Priority: Minor
>
> Drill provides a {{BatchValidator}} that checks vectors. It is disabled by 
> default. This enhancement adds more checks, including checks for row counts 
> (of which there are surprisingly many.)
> Since most operators will fail if the check is enabled, this enhancement also 
> adds a table to keep track of which operators pass the checks (and for which 
> checks should be enabled) and those that still need work. This allows the 
> checks to exist in the code, and to be enabled incrementally as we fix the 
> various problems.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to