[jira] [Commented] (DRILL-5323) Provide test tools to create, populate and compare row sets

ASF GitHub Bot (JIRA) Wed, 29 Mar 2017 18:33:07 -0700

    [ 
https://issues.apache.org/jira/browse/DRILL-5323?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15948234#comment-15948234
 ]


ASF GitHub Bot commented on DRILL-5323:
---------------------------------------

Github user sohami commented on a diff in the pull request:

    https://github.com/apache/drill/pull/785#discussion_r108808598
  
    --- Diff: 
exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java
 ---
    @@ -0,0 +1,158 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.test.rowSet;
    +
    +import org.apache.drill.common.types.TypeProtos.MajorType;
    +import org.apache.drill.common.types.TypeProtos.MinorType;
    +import org.apache.drill.exec.expr.TypeHelper;
    +import org.apache.drill.exec.memory.BufferAllocator;
    +import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer;
    +import org.apache.drill.exec.record.BatchSchema;
    +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
    +import org.apache.drill.exec.record.VectorContainer;
    +import org.apache.drill.exec.record.VectorWrapper;
    +import org.apache.drill.exec.vector.ValueVector;
    +import org.apache.drill.exec.vector.complex.MapVector;
    +import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
    +import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn;
    +import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema;
    +
    +public abstract class AbstractSingleRowSet extends AbstractRowSet 
implements SingleRowSet {
    +
    +  public abstract static class StructureBuilder {
    +    protected final PhysicalSchema schema;
    +    protected final BufferAllocator allocator;
    +    protected final ValueVector[] valueVectors;
    +    protected final MapVector[] mapVectors;
    +    protected int vectorIndex;
    +    protected int mapIndex;
    +
    +    public StructureBuilder(BufferAllocator allocator, RowSetSchema 
schema) {
    +      this.allocator = allocator;
    +      this.schema = schema.physical();
    +      valueVectors = new ValueVector[schema.access().count()];
    +      if (schema.access().mapCount() == 0) {
    +        mapVectors = null;
    +      } else {
    +        mapVectors = new MapVector[schema.access().mapCount()];
    +      }
    +    }
    +  }
    +
    +  public static class VectorBuilder extends StructureBuilder {
    +
    +    public VectorBuilder(BufferAllocator allocator, RowSetSchema schema) {
    +      super(allocator, schema);
    +    }
    +
    +    public ValueVector[] buildContainer(VectorContainer container) {
    +      for (int i = 0; i < schema.count(); i++) {
    +        LogicalColumn colSchema = schema.column(i);
    +        @SuppressWarnings("resource")
    +        ValueVector v = TypeHelper.getNewVector(colSchema.field, 
allocator, null);
    +        container.add(v);
    +        if (colSchema.field.getType().getMinorType() == MinorType.MAP) {
    +          MapVector mv = (MapVector) v;
    +          mapVectors[mapIndex++] = mv;
    +          buildMap(mv, colSchema.mapSchema);
    +        } else {
    +          valueVectors[vectorIndex++] = v;
    +        }
    +      }
    +      container.buildSchema(SelectionVectorMode.NONE);
    +      return valueVectors;
    +    }
    +
    +    private void buildMap(MapVector mapVector, PhysicalSchema mapSchema) {
    +      for (int i = 0; i < mapSchema.count(); i++) {
    +        LogicalColumn colSchema = mapSchema.column(i);
    +        MajorType type = colSchema.field.getType();
    +        Class<? extends ValueVector> vectorClass = 
TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode());
    +        @SuppressWarnings("resource")
    +        ValueVector v = mapVector.addOrGet(colSchema.field.getName(), 
type, vectorClass);
    +        if (type.getMinorType() == MinorType.MAP) {
    +          MapVector mv = (MapVector) v;
    +          mapVectors[mapIndex++] = mv;
    +          buildMap(mv, colSchema.mapSchema);
    +        } else {
    +          valueVectors[vectorIndex++] = v;
    +        }
    +      }
    +    }
    +  }
    +
    +  public static class VectorMapper extends StructureBuilder {
    +
    +    public VectorMapper(BufferAllocator allocator, RowSetSchema schema) {
    +      super(allocator, schema);
    +    }
    +
    +    public ValueVector[] mapContainer(VectorContainer container) {
    +      for (VectorWrapper<?> w : container) {
    +        @SuppressWarnings("resource")
    +        ValueVector v = w.getValueVector();
    +        if (v.getField().getType().getMinorType() == MinorType.MAP) {
    +          MapVector mv = (MapVector) v;
    +          mapVectors[mapIndex++] = mv;
    +          buildMap(mv);
    +        } else {
    +          valueVectors[vectorIndex++] = v;
    +        }
    +      }
    +      return valueVectors;
    +    }
    +
    +    private void buildMap(MapVector mapVector) {
    +      for (ValueVector v : mapVector) {
    +        if (v.getField().getType().getMinorType() == MinorType.MAP) {
    +          MapVector mv = (MapVector) v;
    +          mapVectors[mapIndex++] = mv;
    +          buildMap(mv);
    +        } else {
    +          valueVectors[vectorIndex++] = v;
    +        }
    +      }
    +    }
    +  }
    +
    +  protected final ValueVector[] valueVectors;
    +
    +  public AbstractSingleRowSet(BufferAllocator allocator, BatchSchema 
schema) {
    +    super(allocator, schema, new VectorContainer());
    +    valueVectors = new VectorBuilder(allocator, 
super.schema).buildContainer(container);
    +  }
    +
    +  public AbstractSingleRowSet(BufferAllocator allocator, VectorContainer 
container) {
    +    super(allocator, container.getSchema(), container);
    +    valueVectors = new VectorMapper(allocator, 
super.schema).mapContainer(container);
    +  }
    +
    +  public AbstractSingleRowSet(AbstractSingleRowSet rowSet) {
    +    super(rowSet.allocator, rowSet.schema.batch(), rowSet.container);
    +    valueVectors = rowSet.valueVectors;
    +  }
    +
    +  @Override
    +  public ValueVector[] vectors() { return valueVectors; }
    +
    +  @Override
    +  public int getSize() {
    +    RecordBatchSizer sizer = new RecordBatchSizer(container);
    +    return sizer.actualSize();
    --- End diff --
    
    This method internally access the container's recordCount but while 
building the container I don't see we are setting the RecordCount explicitly by 
calling setRecordCount on it. Neither it looks like while adding valueVector 
inside the container, that count is maintained. Shouldn't we set the 
recordCount ?


> Provide test tools to create, populate and compare row sets
> -----------------------------------------------------------
>
>                 Key: DRILL-5323
>                 URL: https://issues.apache.org/jira/browse/DRILL-5323
>             Project: Apache Drill
>          Issue Type: Sub-task
>          Components: Tools, Build & Test
>    Affects Versions: 1.11.0
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>             Fix For: 1.11.0
>
>
> Operators work with individual row sets. A row set is a collection of records 
> stored as column vectors. (Drill uses various terms for this concept. A 
> record batch is a row set with an operator implementation wrapped around it. 
> A vector container is a row set, but with much functionality left as an 
> exercise for the developer. And so on.)
> To simplify tests, we need a {{TestRowSet}} concept that wraps a 
> {{VectorContainer}} and provides easy ways to:
> * Define a schema for the row set.
> * Create a set of vectors that implement the schema.
> * Populate the row set with test data via code.
> * Add an SV2 to the row set.
> * Pass the row set to operator components (such as generated code blocks.)
> * Compare the results of the operation with an expected result set.
> * Dispose of the underling direct memory when work is done.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

[jira] [Commented] (DRILL-5323) Provide test tools to create, populate and compare row sets

Reply via email to