[GitHub] drill pull request #906: DRILL-5546: Handle schema change exception failure ...

paul-rogers Mon, 21 Aug 2017 12:54:09 -0700

Github user paul-rogers commented on a diff in the pull request:

    https://github.com/apache/drill/pull/906#discussion_r134297885
  
    --- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java 
---
    @@ -152,97 +157,75 @@ public void kill(boolean sendUpstream) {
         }
       }
     
    -  private void releaseAssets() {
    -    container.zeroVectors();
    -  }
    -
    -  private void clearFieldVectorMap() {
    -    for (final ValueVector v : mutator.fieldVectorMap().values()) {
    -      v.clear();
    -    }
    -  }
    -
       @Override
       public IterOutcome next() {
         if (done) {
           return IterOutcome.NONE;
         }
         oContext.getStats().startProcessing();
         try {
    -      try {
    -        injector.injectChecked(context.getExecutionControls(), 
"next-allocate", OutOfMemoryException.class);
    -
    -        currentReader.allocate(mutator.fieldVectorMap());
    -      } catch (OutOfMemoryException e) {
    -        clearFieldVectorMap();
    -        throw UserException.memoryError(e).build(logger);
    -      }
    -      while ((recordCount = currentReader.next()) == 0) {
    +      while (true) {
             try {
    -          if (!readers.hasNext()) {
    -            // We're on the last reader, and it has no (more) rows.
    -            currentReader.close();
    -            releaseAssets();
    -            done = true;  // have any future call to next() return NONE
    -
    -            if (mutator.isNewSchema()) {
    -              // This last reader has a new schema (e.g., we have a 
zero-row
    -              // file or other source).  (Note that some sources have a 
non-
    -              // null/non-trivial schema even when there are no rows.)
    +          injector.injectChecked(context.getExecutionControls(), 
"next-allocate", OutOfMemoryException.class);
    +          currentReader.allocate(mutator.fieldVectorMap());
    +        } catch (OutOfMemoryException e) {
    +          clearFieldVectorMap();
    +          throw UserException.memoryError(e).build(logger);
    +        }
     
    -              container.buildSchema(SelectionVectorMode.NONE);
    -              schema = container.getSchema();
    +        recordCount = currentReader.next();
    +        Preconditions.checkArgument(recordCount >= 0,
    +            "recordCount from RecordReader.next() should not be negative");
     
    -              return IterOutcome.OK_NEW_SCHEMA;
    -            }
    -            return IterOutcome.NONE;
    -          }
    -          // At this point, the reader that hit its end is not the last 
reader.
    +        boolean isNewRegularSchema = mutator.isNewSchema();
    +        // We should skip the reader, when recordCount = 0 && ! 
isNewRegularSchema.
    +        // Add/set implicit column vectors, only when reader gets > 0 row, 
or
    +        // when reader gets 0 row but with a schema with new field added
    +        if (recordCount > 0 || isNewRegularSchema) {
    +          addImplicitVectors();
    +          populateImplicitVectors();
    +        }
     
    -          // If all the files we have read so far are just empty, the 
schema is not useful
    -          if (! hasReadNonEmptyFile) {
    -            container.clear();
    -            clearFieldVectorMap();
    -            mutator.clear();
    -          }
    +        boolean isNewImplicitSchema = mutator.isNewSchema();
    +        for (VectorWrapper<?> w : container) {
    +          w.getValueVector().getMutator().setValueCount(recordCount);
    +        }
    +        final boolean isNewSchema = isNewRegularSchema || 
isNewImplicitSchema;
    +        oContext.getStats().batchReceived(0, recordCount, isNewSchema);
     
    +        if (recordCount == 0) {
               currentReader.close();
    -          currentReader = readers.next();
    -          implicitValues = implicitColumns.hasNext() ? 
implicitColumns.next() : null;
    -          currentReader.setup(oContext, mutator);
    -          try {
    -            currentReader.allocate(mutator.fieldVectorMap());
    -          } catch (OutOfMemoryException e) {
    -            clearFieldVectorMap();
    -            throw UserException.memoryError(e).build(logger);
    +          if (isNewSchema) {
    +            // current reader presents a new schema in mutator even though 
it has 0 row.
    +            // This could happen when data sources have a non-trivial 
schema with 0 row.
    +            container.buildSchema(SelectionVectorMode.NONE);
    +            schema = container.getSchema();
    +            if (readers.hasNext()) {
    +              advanceNextReader();
    +            } else {
    +              done = true;  // indicates the follow-up next() call will 
return IterOutcome.NONE.
    +            }
    +            return IterOutcome.OK_NEW_SCHEMA;
    +          } else { // not a new schema
    +            if (readers.hasNext()) {
    +              advanceNextReader();
    +              continue; // skip reader returning 0 row and having same 
schema.
    --- End diff --
    
    Per comment above: we have a payload of allocated vectors. Which code 
releases that memory before we allocate again?



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

[GitHub] drill pull request #906: DRILL-5546: Handle schema change exception failure ...

Reply via email to