DRILL-6162: Enhance record batch sizer to retain nesting information. Refactor record batch sizer and add unit tests for sizing and vector allocation.
Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/47c5d1fe Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/47c5d1fe Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/47c5d1fe Branch: refs/heads/master Commit: 47c5d1feaaaf9b6384ed8ef1011fa58b9272b362 Parents: 0a8a3f1 Author: Padma Penumarthy <[email protected]> Authored: Tue Mar 6 16:05:48 2018 -0800 Committer: Ben-Zvi <[email protected]> Committed: Wed Mar 7 15:41:11 2018 -0800 ---------------------------------------------------------------------- .../drill/exec/record/RecordBatchSizer.java | 479 ++++++++--- .../drill/exec/record/VectorInitializer.java | 23 +- .../impl/xsort/managed/TestShortArrays.java | 6 +- .../exec/physical/unit/TestOutputBatchSize.java | 118 ++- .../drill/exec/record/TestRecordBatchSizer.java | 806 +++++++++++++++++++ .../apache/drill/exec/vector/ValueVector.java | 1 + 6 files changed, 1296 insertions(+), 137 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/47c5d1fe/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java index 536c8bc..9525c91 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java @@ -27,7 +27,11 @@ import org.apache.drill.exec.expr.TypeHelper; import org.apache.drill.exec.memory.AllocationManager.BufferLedger; import org.apache.drill.exec.memory.BaseAllocator; import org.apache.drill.exec.record.selection.SelectionVector2; +import org.apache.drill.exec.vector.AllocationHelper; +import org.apache.drill.exec.vector.NullableVector; +import org.apache.drill.exec.vector.UInt1Vector; import org.apache.drill.exec.vector.UInt4Vector; +import org.apache.drill.exec.vector.UntypedNullVector; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.complex.AbstractMapVector; import org.apache.drill.exec.vector.complex.RepeatedListVector; @@ -36,6 +40,7 @@ import org.apache.drill.exec.vector.complex.RepeatedValueVector; import org.apache.drill.exec.vector.VariableWidthVector; import com.google.common.collect.Sets; +import org.apache.drill.exec.vector.complex.RepeatedVariableWidthVectorLike; /** * Given a record batch or vector container, determines the actual memory @@ -43,6 +48,9 @@ import com.google.common.collect.Sets; */ public class RecordBatchSizer { + private static final int OFFSET_VECTOR_WIDTH = UInt4Vector.VALUE_WIDTH; + private static final int BIT_VECTOR_WIDTH = UInt1Vector.VALUE_WIDTH; + private static final int STD_REPETITION_FACTOR = 10; /** * Column size information. @@ -52,20 +60,19 @@ public class RecordBatchSizer { public final MaterializedField metadata; /** - * Assumed size from Drill metadata. Note that this information is - * 100% bogus for variable-width columns. Do not use it for such - * columns. + * This is the total size of just pure data for the column + * for all entries. */ - public int stdSize; + private int totalDataSize; /** - * Actual average column width as determined from actual memory use. This - * size is larger than the actual data size since this size includes per- - * column overhead such as any unused vector space, etc. + * This is the total size of data for the column + additional + * metadata vector overhead we add for cardinality, variable length etc. + * for all entries. */ - public final int estSize; + private int totalNetSize; /** * Number of occurrences of the value in the batch. This is trivial @@ -76,104 +83,324 @@ public class RecordBatchSizer { * greater than (but unlikely) same as the row count. */ - public final int valueCount; + private final int valueCount; /** - * Total number of elements for a repeated type, or 1 if this is - * a non-repeated type. That is, a batch of 100 rows may have an - * array with 10 elements per row. In this case, the element count - * is 1000. + * Total number of elements for a repeated type, or same as + * valueCount if this is a non-repeated type. That is, a batch + * of 100 rows may have an array with 10 elements per row. + * In this case, the element count is 1000. */ - public final int elementCount; + private int elementCount; /** - * Size of the top level value vector. For map and repeated list, - * this is just size of offset vector. + * The estimated, average number of elements per parent value. + * Always 1 for a non-repeated type. For a repeated type, + * this is the average entries per array (per repeated element). */ - public int dataSize; + + private float cardinality; /** - * Total size of the column includes the sum total of memory for all - * value vectors representing the column. + * Indicates if it is variable width column. + * For map columns, this is true if any of the children is variable + * width column. */ - public int netSize; + + private boolean isVariableWidth; /** - * The estimated, average number of elements per parent value. - * Always 1 for a non-repeated type. For a repeated type, - * this is the average entries per array (per repeated element). + * Indicates if cardinality is repeated(top level only). + */ + + private boolean isRepeated; + + /** + * Indicates if cardinality is optional i.e. nullable(top level only). + */ + private boolean isOptional; + + /** + * Child columns if this is a map column. + */ + private Map<String, ColumnSize> children = CaseInsensitiveMap.newHashMap(); + + /** + * std pure data size per entry from Drill metadata, based on type. + * Does not include metadata vector overhead we add for cardinality, + * variable length etc. + * For variable-width columns, we use 50 as std size for entry width. + * For repeated column, we assume repetition of 10. + */ + public int getStdDataSizePerEntry() { + int stdDataSize; + + try { + stdDataSize = TypeHelper.getSize(metadata.getType()); + + // For variable width, typeHelper includes offset vector width. Adjust for that. + if (isVariableWidth) { + stdDataSize -= OFFSET_VECTOR_WIDTH; + } + + if (isRepeated) { + stdDataSize = stdDataSize * STD_REPETITION_FACTOR; + } + } catch (Exception e) { + // For unsupported types, just set stdSize to 0. + // Map, Union, List etc. + stdDataSize = 0; + } + + // Add sizes of children. + for (ColumnSize columnSize : children.values()) { + stdDataSize += columnSize.getStdDataSizePerEntry(); + } + + if (isRepeatedList()) { + stdDataSize = stdDataSize * STD_REPETITION_FACTOR; + } + + return stdDataSize; + } + + /** + * std net size per entry taking into account additional metadata vectors + * we add on top for variable length, cardinality etc. + * For variable-width columns, we use 50 as std data size for entry width. + * For repeated column, we assume repetition of 10. */ + public int getStdNetSizePerEntry() { + int stdNetSize; + try { + stdNetSize = TypeHelper.getSize(metadata.getType()); + } catch (Exception e) { + stdNetSize = 0; + } - public final float estElementCountPerArray; - public final boolean isVariableWidth; + if (isOptional) { + stdNetSize += BIT_VECTOR_WIDTH; + } + + if (isRepeated) { + stdNetSize = (stdNetSize * STD_REPETITION_FACTOR) + OFFSET_VECTOR_WIDTH; + } + + for (ColumnSize columnSize : children.values()) { + stdNetSize += columnSize.getStdNetSizePerEntry(); + } + + if (isRepeatedList()) { + stdNetSize = (stdNetSize * STD_REPETITION_FACTOR) + OFFSET_VECTOR_WIDTH; + } + + return stdNetSize; + } + + /** + * This is the average actual per entry data size in bytes. Does not + * include any overhead of metadata vectors. + * For repeated columns, it is average for the repeated array, not + * individual entry in the array. + */ + public int getDataSizePerEntry() { + return safeDivide(getTotalDataSize(), getValueCount()); + } + + /** + * This is the average per entry size of just pure data plus + * overhead of additional vectors we add on top like bits vector, + * offset vector etc. This + * size is larger than the actual data size since this size includes per- + * column overhead for additional vectors we add for + * cardinality, variable length etc. + */ + public int getNetSizePerEntry() { + return safeDivide(getTotalNetSize(), getValueCount()); + } + + /** + * This is the total data size for the column, including children for map + * columns. Does not include any overhead of metadata vectors. + */ + public int getTotalDataSize() { + int dataSize = this.totalDataSize; + for (ColumnSize columnSize : children.values()) { + dataSize += columnSize.getTotalDataSize(); + } + return dataSize; + } + + /** + * This is the total net size for the column, including children for map + * columns. Includes overhead of metadata vectors. + */ + public int getTotalNetSize() { + return this.totalNetSize; + } + + public int getValueCount() { + return valueCount; + } + + public int getElementCount() { + return elementCount; + } + + public float getCardinality() { + return cardinality; + } + + public boolean isVariableWidth() { + return isVariableWidth; + } + + public Map<String, ColumnSize> getChildren() { + return children; + } + + public boolean isComplex() { + return metadata.getType().getMinorType() == MinorType.MAP || + metadata.getType().getMinorType() == MinorType.UNION || + metadata.getType().getMinorType() == MinorType.LIST; + } + + public boolean isRepeatedList() { + if (metadata.getType().getMinorType() == MinorType.LIST && + metadata.getDataMode() == DataMode.REPEATED) { + return true; + } + return false; + } + + /** + * This is the average per entry width, used for vector allocation. + */ + public int getEntryWidth() { + int width = 0; + if (isVariableWidth) { + width = getNetSizePerEntry() - OFFSET_VECTOR_WIDTH; + + // Subtract out the bits (is-set) vector width + if (metadata.getDataMode() == DataMode.OPTIONAL) { + width -= BIT_VECTOR_WIDTH; + } + } + + return (safeDivide(width, cardinality)); + } public ColumnSize(ValueVector v, String prefix) { this.prefix = prefix; valueCount = v.getAccessor().getValueCount(); metadata = v.getField(); - isVariableWidth = v instanceof VariableWidthVector; - - // The amount of memory consumed by the payload: the actual - // data stored in the vectors. - - if (v.getField().getDataMode() == DataMode.REPEATED) { - elementCount = buildRepeated(v); - estElementCountPerArray = valueCount == 0 ? 0 : elementCount * 1.0f / valueCount; - } else { - elementCount = 1; - estElementCountPerArray = 1; + isVariableWidth = (v instanceof VariableWidthVector || v instanceof RepeatedVariableWidthVectorLike); + elementCount = valueCount; + cardinality = 1; + totalNetSize = v.getPayloadByteCount(valueCount); + + // Special case. For union and list vectors, it is very complex + // to figure out raw data size. Make it same as net size. + if (metadata.getType().getMinorType() == MinorType.UNION || + (metadata.getType().getMinorType() == MinorType.LIST && v.getField().getDataMode() != DataMode.REPEATED)) { + totalDataSize = totalNetSize; } - switch (metadata.getType().getMinorType()) { - case LIST: - buildList(v); - break; - case MAP: - case UNION: - // No standard size for Union type - dataSize = v.getPayloadByteCount(valueCount); - break; - default: - dataSize = v.getPayloadByteCount(valueCount); - try { - stdSize = TypeHelper.getSize(metadata.getType()) * elementCount; - } catch (Exception e) { - // For unsupported types, just set stdSize to 0. - stdSize = 0; - } + + switch(v.getField().getDataMode()) { + case REPEATED: + isRepeated = true; + elementCount = getElementCount(v); + cardinality = valueCount == 0 ? 0 : elementCount * 1.0f / valueCount; + + // For complex types, there is nothing more to do for top columns. + // Data size is calculated recursively for children later. + if (isComplex()) { + return; + } + + // Calculate pure data size. + if (isVariableWidth) { + UInt4Vector offsetVector = ((RepeatedValueVector) v).getOffsetVector(); + int innerValueCount = offsetVector.getAccessor().get(valueCount); + VariableWidthVector dataVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector()); + totalDataSize = dataVector.getOffsetVector().getAccessor().get(innerValueCount); + } else { + ValueVector dataVector = ((RepeatedValueVector) v).getDataVector(); + totalDataSize = dataVector.getPayloadByteCount(elementCount); + } + + break; + case OPTIONAL: + isOptional = true; + + // For complex types, there is nothing more to do for top columns. + // Data size is calculated recursively for children later. + if (isComplex()) { + return; + } + + // Calculate pure data size. + if (isVariableWidth) { + VariableWidthVector variableWidthVector = ((VariableWidthVector) ((NullableVector) v).getValuesVector()); + totalDataSize = variableWidthVector.getOffsetVector().getAccessor().get(valueCount); + } else { + // Another special case. + if (v instanceof UntypedNullVector) { + return; + } + totalDataSize = ((NullableVector) v).getValuesVector().getPayloadByteCount(valueCount); + } + break; + + case REQUIRED: + // For complex types, there is nothing more to do for top columns. + // Data size is calculated recursively for children later. + if (isComplex()) { + return; + } + + // Calculate pure data size. + if (isVariableWidth) { + UInt4Vector offsetVector = ((VariableWidthVector)v).getOffsetVector(); + totalDataSize = offsetVector.getAccessor().get(valueCount); + } else { + totalDataSize = v.getPayloadByteCount(valueCount); + } + break; + + default: + break; } - estSize = safeDivide(dataSize, valueCount); - netSize = v.getPayloadByteCount(valueCount); } @SuppressWarnings("resource") - private int buildRepeated(ValueVector v) { - + private int getElementCount(ValueVector v) { // Repeated vectors are special: they have an associated offset vector // that changes the value count of the contained vectors. - UInt4Vector offsetVector = ((RepeatedValueVector) v).getOffsetVector(); int childCount = valueCount == 0 ? 0 : offsetVector.getAccessor().get(valueCount); - if (metadata.getType().getMinorType() == MinorType.MAP) { - // For map, the only data associated with the map vector - // itself is the offset vector, if any. + return childCount; + } + + private void allocateMap(AbstractMapVector map, int recordCount) { + if (map instanceof RepeatedMapVector) { + ((RepeatedMapVector) map).allocateOffsetsNew(recordCount); + recordCount *= getCardinality(); + } - dataSize = offsetVector.getPayloadByteCount(valueCount); + for (ValueVector vector : map) { + children.get(vector.getField().getName()).allocateVector(vector, recordCount); } - return childCount; } - @SuppressWarnings("resource") - private void buildList(ValueVector v) { - // complex ListVector cannot be casted to RepeatedListVector. - // check the mode. - if (v.getField().getDataMode() != DataMode.REPEATED) { - dataSize = v.getPayloadByteCount(valueCount); + public void allocateVector(ValueVector vector, int recordCount) { + if (vector instanceof AbstractMapVector) { + allocateMap((AbstractMapVector) vector, recordCount); return; } - UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector(); - dataSize = offsetVector.getPayloadByteCount(valueCount); + AllocationHelper.allocate(vector, recordCount, getEntryWidth(), getCardinality()); } @Override @@ -191,15 +418,21 @@ public class RecordBatchSizer { buf.append(", elements: ") .append(elementCount) .append(", per-array: ") - .append(estElementCountPerArray); + .append(cardinality); } - buf .append(", std size: ") - .append(stdSize) - .append(", actual size: ") - .append(estSize) - .append(", data size: ") - .append(dataSize) - .append(")"); + buf.append("Per entry: std data size: ") + .append(getStdDataSizePerEntry()) + .append(", std net size: ") + .append(getStdNetSizePerEntry()) + .append(", actual data size: ") + .append(getDataSizePerEntry()) + .append(", actual net size: ") + .append(getNetSizePerEntry()) + .append(" Totals: data size: ") + .append(getTotalDataSize()) + .append(", net size: ") + .append(getTotalNetSize()) + .append(")"); return buf.toString(); } @@ -214,7 +447,7 @@ public class RecordBatchSizer { * for this column */ - private void buildVectorInitializer(VectorInitializer initializer) { + public void buildVectorInitializer(VectorInitializer initializer) { int width = 0; switch(metadata.getType().getMinorType()) { case VAR16CHAR: @@ -222,11 +455,11 @@ public class RecordBatchSizer { case VARCHAR: // Subtract out the offset vector width - width = estSize - 4; + width = getNetSizePerEntry() - OFFSET_VECTOR_WIDTH; // Subtract out the bits (is-set) vector width if (metadata.getDataMode() == DataMode.OPTIONAL) { - width -= 1; + width -= BIT_VECTOR_WIDTH; } break; default: @@ -237,23 +470,32 @@ public class RecordBatchSizer { if (width > 0) { // Estimated width is width of entire column. Divide // by element count to get per-element size. - initializer.variableWidthArray(name, width / estElementCountPerArray, estElementCountPerArray); + initializer.variableWidthArray(name, width / cardinality, cardinality); } else { - initializer.fixedWidthArray(name, estElementCountPerArray); + initializer.fixedWidthArray(name, cardinality); } } else if (width > 0) { initializer.variableWidth(name, width); } + + for (ColumnSize columnSize : children.values()) { + columnSize.buildVectorInitializer(initializer); + } } + } public static ColumnSize getColumn(ValueVector v, String prefix) { return new ColumnSize(v, prefix); } - public static final int MAX_VECTOR_SIZE = ValueVector.MAX_BUFFER_SIZE; // 16 MiB + public ColumnSize getColumn(String name) { + return columnSizes.get(name); + } + // This keeps information for only top level columns. Information for nested + // columns can be obtained from children of topColumns. private Map<String, ColumnSize> columnSizes = CaseInsensitiveMap.newHashMap(); /** @@ -334,7 +576,15 @@ public class RecordBatchSizer { public RecordBatchSizer(VectorAccessible va, SelectionVector2 sv2) { rowCount = va.getRecordCount(); for (VectorWrapper<?> vw : va) { - measureColumn(vw.getValueVector(), ""); + ColumnSize colSize = measureColumn(vw.getValueVector(), ""); + columnSizes.put(vw.getField().getName(), colSize); + stdRowWidth += colSize.getStdDataSizePerEntry(); + netBatchSize += colSize.getTotalNetSize(); + maxSize = Math.max(maxSize, colSize.getTotalDataSize()); + if (colSize.metadata.isNullable()) { + nullableCount++; + } + netRowWidth += colSize.getNetSizePerEntry(); } for (BufferLedger ledger : ledgers) { @@ -385,44 +635,35 @@ public class RecordBatchSizer { return 64; } - private void measureColumn(ValueVector v, String prefix) { - + private ColumnSize measureColumn(ValueVector v, String prefix) { ColumnSize colSize = new ColumnSize(v, prefix); - columnSizes.put(v.getField().getName(), colSize); - stdRowWidth += colSize.stdSize; - netBatchSize += colSize.dataSize; - maxSize = Math.max(maxSize, colSize.dataSize); - if (colSize.metadata.isNullable()) { - nullableCount++; - } - - // Maps consume no size themselves. However, their contained - // vectors do consume space, so visit columns recursively. - switch (v.getField().getType().getMinorType()) { case MAP: - expandMap((AbstractMapVector) v, prefix + v.getField().getName() + "."); + // Maps consume no size themselves. However, their contained + // vectors do consume space, so visit columns recursively. + expandMap(colSize, (AbstractMapVector) v, prefix + v.getField().getName() + "."); break; case LIST: // complex ListVector cannot be casted to RepeatedListVector. // do not expand the list if it is not repeated mode. if (v.getField().getDataMode() == DataMode.REPEATED) { - expandList((RepeatedListVector) v, prefix + v.getField().getName() + "."); + expandList(colSize, (RepeatedListVector) v, prefix + v.getField().getName() + "."); } break; default: v.collectLedgers(ledgers); } - netRowWidth += colSize.estSize; - netRowWidthCap50 += ! colSize.isVariableWidth ? colSize.estSize : - 8 /* offset vector */ + roundUpToPowerOf2(Math.min(colSize.estSize,50)); + netRowWidthCap50 += ! colSize.isVariableWidth ? colSize.getNetSizePerEntry() : + 8 /* offset vector */ + roundUpToPowerOf2(Math.min(colSize.getNetSizePerEntry(),50)); // above change 8 to 4 after DRILL-5446 is fixed + + return colSize; } - private void expandMap(AbstractMapVector mapVector, String prefix) { + private void expandMap(ColumnSize colSize, AbstractMapVector mapVector, String prefix) { for (ValueVector vector : mapVector) { - measureColumn(vector, prefix); + colSize.children.put(vector.getField().getName(), measureColumn(vector, prefix)); } // For a repeated map, we need the memory for the offset vector (only). @@ -433,11 +674,10 @@ public class RecordBatchSizer { } } - private void expandList(RepeatedListVector vector, String prefix) { - measureColumn(vector.getDataVector(), prefix); + private void expandList(ColumnSize colSize, RepeatedListVector vector, String prefix) { + colSize.children.put(vector.getField().getName(), measureColumn(vector.getDataVector(), prefix)); // Determine memory for the offset vector (only). - vector.collectLedgers(ledgers); } @@ -448,6 +688,20 @@ public class RecordBatchSizer { return (int) Math.ceil((double) num / denom); } + public static int safeDivide(int num, int denom) { + if (denom == 0) { + return 0; + } + return (int) Math.ceil((double) num / denom); + } + + public static int safeDivide(int num, float denom) { + if (denom == 0) { + return 0; + } + return (int) Math.ceil((double) num / denom); + } + public int rowCount() { return rowCount; } public int stdRowWidth() { return stdRowWidth; } public int grossRowWidth() { return grossRowWidth; } @@ -507,4 +761,11 @@ public class RecordBatchSizer { } return initializer; } + + public void allocateVectors(VectorContainer container, int recordCount) { + for (VectorWrapper w : container) { + ColumnSize colSize = columnSizes.get(w.getField().getName()); + colSize.allocateVector(w.getValueVector(), recordCount); + } + } } http://git-wip-us.apache.org/repos/asf/drill/blob/47c5d1fe/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorInitializer.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorInitializer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorInitializer.java index 30c2a7a..90f507d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorInitializer.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorInitializer.java @@ -17,7 +17,7 @@ */ package org.apache.drill.exec.record; -import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -25,6 +25,7 @@ import org.apache.drill.exec.vector.AllocationHelper; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.complex.AbstractMapVector; import org.apache.drill.exec.vector.complex.RepeatedMapVector; +import org.apache.drill.common.map.CaseInsensitiveMap; import com.google.common.annotations.VisibleForTesting; @@ -56,24 +57,24 @@ public class VectorInitializer { @Override public String toString() { StringBuilder buf = new StringBuilder() - .append("{"); + .append("{"); String sep = ""; if (entryWidth > 0) { buf.append("width=") - .append(entryWidth); + .append(entryWidth); sep = ", "; } if (elementCount > 0) { buf.append(sep) - .append("elements=") - .append(elementCount); + .append("elements=") + .append(elementCount); } buf.append("}"); return buf.toString(); } } - private Map<String, AllocationHint> hints = new HashMap<>(); + private Map<String, AllocationHint> hints = CaseInsensitiveMap.newHashMap(); public void variableWidth(String name, int width) { hints.put(name, new AllocationHint(width, 1)); @@ -97,7 +98,13 @@ public class VectorInitializer { } } - private void allocateVector(ValueVector vector, String prefix, int recordCount) { + public void allocateVectors(List<ValueVector> valueVectors, int recordCount) { + for (ValueVector v : valueVectors) { + allocateVector(v, v.getField().getName(), recordCount); + } + } + + public void allocateVector(ValueVector vector, String prefix, int recordCount) { String key = prefix + vector.getField().getName(); AllocationHint hint = hints.get(key); if (vector instanceof AbstractMapVector) { @@ -117,7 +124,7 @@ public class VectorInitializer { // ", " + size); } - private void allocateVector(ValueVector vector, int recordCount, AllocationHint hint) { + public void allocateVector(ValueVector vector, int recordCount, AllocationHint hint) { if (hint == null) { // Use hard-coded values. Same as ScanBatch http://git-wip-us.apache.org/repos/asf/drill/blob/47c5d1fe/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java index 3c210f7..735c5ce 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java @@ -72,15 +72,15 @@ public class TestShortArrays extends SubOperatorTest { RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); assertEquals(2, sizer.columns().size()); ColumnSize bCol = sizer.columns().get("b"); - assertEquals(0.1, bCol.estElementCountPerArray, 0.01); - assertEquals(1, bCol.elementCount); + assertEquals(0.1, bCol.getCardinality(), 0.01); + assertEquals(1, bCol.getElementCount()); // Create a vector initializer using the sizer info. VectorInitializer vi = sizer.buildVectorInitializer(); AllocationHint bHint = vi.hint("b"); assertNotNull(bHint); - assertEquals(bHint.elementCount, bCol.estElementCountPerArray, 0.001); + assertEquals(bHint.elementCount, bCol.getCardinality(), 0.001); // Create a new batch, and new vector, using the sizer and // initializer inferred from the previous batch. http://git-wip-us.apache.org/repos/asf/drill/blob/47c5d1fe/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java index 741164f..edd4cbf 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java @@ -31,15 +31,23 @@ import org.apache.drill.exec.physical.impl.ScanBatch; import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.record.VectorAccessible; +import org.apache.drill.exec.record.VectorWrapper; import org.apache.drill.exec.util.JsonStringArrayList; import org.apache.drill.exec.util.JsonStringHashMap; import org.apache.drill.exec.util.Text; +import org.apache.drill.exec.vector.UInt4Vector; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.complex.RepeatedListVector; +import org.apache.drill.exec.vector.complex.RepeatedValueVector; import org.junit.Test; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + public class TestOutputBatchSize extends PhysicalOpUnitTestBase { private static final long initReservation = AbstractBase.INIT_ALLOCATION; private static final long maxAllocation = AbstractBase.MAX_ALLOCATION; @@ -111,14 +119,14 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { long totalSize = getExpectedSize(expectedJsonBatches); // set the output batch size to 1/2 of total size expected. - // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in flatten. + // We will get approximately get 2 batches and max of 4. fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2); OperatorTestBuilder opTestBuilder = opTestBuilder() .physicalOperator(flatten) .inputDataStreamJson(inputJsonBatches) .baselineColumns("a", "b", "c") - .expectedNumBatches(4) // verify number of batches + .expectedNumBatches(2) // verify number of batches .expectedBatchSize(totalSize / 2); // verify batch size. for (int i = 0; i < numRows + 1; i++) { @@ -179,14 +187,14 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { long totalSize = getExpectedSize(expectedJsonBatches); // set the output batch size to 1/2 of total size expected. - // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in flatten. + // We will get approximately get 2 batches and max of 4. fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2); OperatorTestBuilder opTestBuilder = opTestBuilder() .physicalOperator(flatten) .inputDataStreamJson(inputJsonBatches) .baselineColumns("a", "b", "c") - .expectedNumBatches(4) // verify number of batches + .expectedNumBatches(2) // verify number of batches .expectedBatchSize(totalSize / 2); // verify batch size. for (int i = 0; i < numRows + 1; i++) { @@ -241,14 +249,14 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { long totalSize = getExpectedSize(expectedJsonBatches); // set the output batch size to 1/2 of total size expected. - // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in flatten. + // We will get approximately get 2 batches and max of 4. fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2); OperatorTestBuilder opTestBuilder = opTestBuilder() .physicalOperator(flatten) .inputDataStreamJson(inputJsonBatches) .baselineColumns("a", "b", "c") - .expectedNumBatches(4) // verify number of batches + .expectedNumBatches(2) // verify number of batches .expectedBatchSize(totalSize); // verify batch size. for (int i = 0; i < numRows + 1; i++) { @@ -302,14 +310,14 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { long totalSize = getExpectedSize(expectedJsonBatches); // set the output batch size to 1/2 of total size expected. - // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in flatten. + // We will get approximately get 2 batches and max of 4. fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2); OperatorTestBuilder opTestBuilder = opTestBuilder() .physicalOperator(flatten) .inputDataStreamJson(inputJsonBatches) .baselineColumns("a", "b", "c") - .expectedNumBatches(4) // verify number of batches + .expectedNumBatches(2) // verify number of batches .expectedBatchSize(totalSize); // verify batch size. final JsonStringArrayList<Text> birds1 = new JsonStringArrayList<Text>() {{ @@ -390,14 +398,14 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { long totalSize = getExpectedSize(expectedJsonBatches); // set the output batch size to 1/2 of total size expected. - // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in flatten. + // We will get approximately get 2 batches and max of 4. fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2); OperatorTestBuilder opTestBuilder = opTestBuilder() .physicalOperator(flatten) .inputDataStreamJson(inputJsonBatches) .baselineColumns("a", "b", "c") - .expectedNumBatches(4) // verify number of batches + .expectedNumBatches(2) // verify number of batches .expectedBatchSize(totalSize / 2); // verify batch size. JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>(); @@ -499,14 +507,14 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { long totalSize = getExpectedSize(expectedJsonBatches); // set the output batch size to 1/2 of total size expected. - // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in flatten. + // We will get approximately get 2 batches and max of 4. fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2); OperatorTestBuilder opTestBuilder = opTestBuilder() .physicalOperator(flatten) .inputDataStreamJson(inputJsonBatches) .baselineColumns("a", "b", "c") - .expectedNumBatches(4) // verify number of batches + .expectedNumBatches(2) // verify number of batches .expectedBatchSize(totalSize / 2); // verify batch size. final JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>(); @@ -597,14 +605,14 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { long totalSize = getExpectedSize(expectedJsonBatches); // set the output batch size to 1/2 of total size expected. - // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in flatten. + // We will get approximately get 2 batches and max of 4. fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2); OperatorTestBuilder opTestBuilder = opTestBuilder() .physicalOperator(flatten) .inputDataStreamJson(inputJsonBatches) .baselineColumns("a", "b", "c") - .expectedNumBatches(4) // verify number of batches + .expectedNumBatches(2) // verify number of batches .expectedBatchSize(totalSize / 2); // verify batch size. JsonStringHashMap<String, Object> innerMapResult = new JsonStringHashMap<>(); @@ -694,7 +702,7 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { long totalSize = getExpectedSize(expectedJsonBatches); // set the output batch size to 1/2 of total size expected. - // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in flatten. + // We will get 16 batches because of upper bound of 65535 rows. fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2); // Here we expect 16 batches because each batch will be limited by upper limit of 65535 records. @@ -863,14 +871,14 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { long totalSize = getExpectedSize(expectedJsonBatches); // set the output batch size to 1/2 of total size expected. - // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in flatten. + // We will get approximately get 2 batches and max of 4. fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2); OperatorTestBuilder opTestBuilder = opTestBuilder() .physicalOperator(flatten) .inputDataStreamJson(inputJsonBatches) .baselineColumns("a", "b", "c") - .expectedNumBatches(4) // verify number of batches + .expectedNumBatches(2) // verify number of batches .expectedBatchSize(totalSize / 2); // verify batch size. for (long k = 0; k < ((numRows + 1)); k++) { @@ -1126,4 +1134,80 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { opTestBuilder.go(); } + + @Test + public void testSizerRepeatedList() throws Exception { + List<String> inputJsonBatches = Lists.newArrayList(); + StringBuilder batchString = new StringBuilder(); + + StringBuilder newString = new StringBuilder(); + newString.append("[ [1,2,3,4], [5,6,7,8] ]"); + + numRows = 9; + batchString.append("["); + for (int i = 0; i < numRows; i++) { + batchString.append("{\"c\" : " + newString); + batchString.append("},"); + } + batchString.append("{\"c\" : " + newString); + batchString.append("}"); + + batchString.append("]"); + inputJsonBatches.add(batchString.toString()); + + // Create a dummy scanBatch to figure out the size. + RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), + fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext)); + + VectorAccessible va = new BatchIterator(scanBatch).iterator().next(); + RecordBatchSizer sizer = new RecordBatchSizer(va); + + assertEquals(1, sizer.columns().size()); + RecordBatchSizer.ColumnSize column = sizer.columns().get("c"); + assertNotNull(column); + + /** + * stdDataSize:8*10*10, stdNetSize:8*10*10 + 4*10 + 4*10 + 4, + * dataSizePerEntry:8*8, netSizePerEntry:8*8 + 4*2 + 4, + * totalDataSize:8*8*10, totalNetSize:netSizePerEntry*10, valueCount:10, + * elementCount:10, estElementCountPerArray:1, isVariableWidth:false + */ + assertEquals(800, column.getStdDataSizePerEntry()); + assertEquals(884, column.getStdNetSizePerEntry()); + assertEquals(64, column.getDataSizePerEntry()); + assertEquals(76, column.getNetSizePerEntry()); + assertEquals(640, column.getTotalDataSize()); + assertEquals(760, column.getTotalNetSize()); + assertEquals(10, column.getValueCount()); + assertEquals(20, column.getElementCount()); + assertEquals(2, column.getCardinality(), 0.01); + assertEquals(false, column.isVariableWidth()); + + final int testRowCount = 1000; + final int testRowCountPowerTwo = 2048; + + for (VectorWrapper<?> vw : va) { + ValueVector v = vw.getValueVector(); + v.clear(); + + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity()); + ValueVector dataVector = ((RepeatedValueVector) v).getDataVector(); + assertEquals(Integer.highestOneBit((testRowCount * 2) << 1) - 1, dataVector.getValueCapacity()); + v.clear(); + + // Allocates the same as value passed since it is already power of two. + // -1 is done for adjustment needed for offset vector. + colSize.allocateVector(v, testRowCountPowerTwo - 1); + offsetVector = ((RepeatedListVector) v).getOffsetVector(); + assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); + dataVector = ((RepeatedValueVector) v).getDataVector(); + assertEquals(Integer.highestOneBit(testRowCountPowerTwo)-1, dataVector.getValueCapacity()); + v.clear(); + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/47c5d1fe/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestRecordBatchSizer.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestRecordBatchSizer.java b/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestRecordBatchSizer.java new file mode 100644 index 0000000..0490406 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestRecordBatchSizer.java @@ -0,0 +1,806 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.RecordBatchSizer.ColumnSize; +import org.apache.drill.exec.vector.NullableVector; +import org.apache.drill.exec.vector.UInt4Vector; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.VariableWidthVector; +import org.apache.drill.exec.vector.complex.MapVector; +import org.apache.drill.exec.vector.complex.RepeatedMapVector; +import org.apache.drill.exec.vector.complex.RepeatedValueVector; +import org.apache.drill.test.SubOperatorTest; +import org.apache.drill.test.rowSet.RowSet; +import org.apache.drill.test.rowSet.RowSet.SingleRowSet; +import org.apache.drill.test.rowSet.RowSetBuilder; +import org.apache.drill.test.rowSet.SchemaBuilder; +import org.junit.Test; + +public class TestRecordBatchSizer extends SubOperatorTest { + private final int testRowCount = 1000; + private final int testRowCountPowerTwo = 2048; + + + private void verifyColumnValues(ColumnSize column, int stdDataSizePerEntry, int stdNetSizePerEntry, + int dataSizePerEntry, int netSizePerEntry, int totalDataSize, + int totalNetSize, int valueCount, int elementCount, + int estElementCountPerArray, boolean isVariableWidth) { + assertNotNull(column); + + assertEquals(stdDataSizePerEntry, column.getStdDataSizePerEntry()); + assertEquals(stdNetSizePerEntry, column.getStdNetSizePerEntry()); + + assertEquals(dataSizePerEntry, column.getDataSizePerEntry()); + assertEquals(netSizePerEntry, column.getNetSizePerEntry()); + + assertEquals(totalDataSize, column.getTotalDataSize()); + assertEquals(totalNetSize, column.getTotalNetSize()); + + assertEquals(valueCount, column.getValueCount()); + assertEquals(elementCount, column.getElementCount()); + + assertEquals(estElementCountPerArray, column.getCardinality(), 0.01); + assertEquals(isVariableWidth, column.isVariableWidth()); + } + + @Test + public void testSizerFixedWidth() { + BatchSchema schema = new SchemaBuilder().add("a", MinorType.BIGINT).add("b", MinorType.FLOAT8).build(); + RowSetBuilder builder = fixture.rowSetBuilder(schema); + + for (long i = 0; i < 10; i++) { + builder.addRow(i, (float) i * 0.1); + } + RowSet rows = builder.build(); + + // Run the record batch sizer on the resulting batch. + RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + assertEquals(2, sizer.columns().size()); + + ColumnSize aColumn = sizer.columns().get("a"); + + /** + * stdDataSize:8, stdNetSize:8, dataSizePerEntry:8, netSizePerEntry:8, + * totalDataSize:8*10, totalNetSize:8*10, valueCount:10, + * elementCount:10, estElementCountPerArray:1, isVariableWidth:false + */ + verifyColumnValues(aColumn, 8, 8, 8, 8, 80, 80, 10, 10, 1, false); + + ColumnSize bColumn = sizer.columns().get("b"); + verifyColumnValues(bColumn, 8, 8, 8, 8, 80, 80, 10, 10, 1,false); + + SingleRowSet empty = fixture.rowSet(schema); + VectorAccessible accessible = empty.vectorAccessible(); + + for (VectorWrapper<?> vw : accessible) { + ValueVector v = vw.getValueVector(); + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + assertEquals((Integer.highestOneBit(testRowCount) << 1), v.getValueCapacity()); + v.clear(); + + // Allocates the same as value passed since it is already power of two. + colSize.allocateVector(v, testRowCountPowerTwo); + assertEquals(testRowCountPowerTwo, v.getValueCapacity()); + v.clear(); + + // Allocate for max rows. + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT); + assertEquals(ValueVector.MAX_ROW_COUNT, v.getValueCapacity()); + v.clear(); + + // Allocate for 0 rows. should atleast do allocation for 1 row. + colSize.allocateVector(v, 0); + assertEquals(ValueVector.MIN_ROW_COUNT, v.getValueCapacity()); + v.clear(); + } + + rows.clear(); + empty.clear(); + } + + + @Test + public void testSizerRepeatedFixedWidth() { + BatchSchema schema = new SchemaBuilder().addArray("a", MinorType.BIGINT).addArray("b", MinorType.FLOAT8).build(); + RowSetBuilder builder = fixture.rowSetBuilder(schema); + + for (long i = 0; i < 10; i++) { + builder.addRow(new long[] {1, 2, 3, 4, 5}, new double[] {(double)i*0.1, (double)i*0.1, (double)i*0.1, (double)i*0.2, (double)i*0.3}); + } + RowSet rows = builder.build(); + + // Run the record batch sizer on the resulting batch. + RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + assertEquals(2, sizer.columns().size()); + + /** + * stdDataSize:8*10, stdNetSize:8*10+4, dataSizePerEntry:5*8, netSizePerEntry:5*8+4, + * totalDataSize:5*8*10, totalNetSize:5*8*10+5*8, valueCount:10, + * elementCount:50, estElementCountPerArray:5, isVariableWidth:false + */ + verifyColumnValues(sizer.columns().get("a"), + 80, 84, 40, 44, 400, 440, 10, 50, 5, false); + + verifyColumnValues(sizer.columns().get("b"), + 80, 84, 40, 44, 400, 440, 10, 50, 5, false); + + SingleRowSet empty = fixture.rowSet(schema); + VectorAccessible accessible = empty.vectorAccessible(); + + UInt4Vector offsetVector; + ValueVector dataVector; + + for (VectorWrapper<?> vw : accessible) { + ValueVector v = vw.getValueVector(); + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + offsetVector = ((RepeatedValueVector) v).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity()); + dataVector = ((RepeatedValueVector) v).getDataVector(); + assertEquals(Integer.highestOneBit((testRowCount * 5) << 1), dataVector.getValueCapacity()); + v.clear(); + + // Allocates the same as value passed since it is already power of two. + // -1 is done for adjustment needed for offset vector. + colSize.allocateVector(v, testRowCountPowerTwo - 1); + offsetVector = ((RepeatedValueVector) v).getOffsetVector(); + assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); + dataVector = ((RepeatedValueVector) v).getDataVector(); + assertEquals(Integer.highestOneBit((testRowCountPowerTwo -1) * 5) << 1, dataVector.getValueCapacity()); + v.clear(); + + // Allocate for max rows. + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1); + offsetVector = ((RepeatedValueVector) v).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity()); + dataVector = ((RepeatedValueVector) v).getDataVector(); + assertEquals(Integer.highestOneBit(((ValueVector.MAX_ROW_COUNT - 1)* 5) << 1), dataVector.getValueCapacity()); + v.clear(); + + // Allocate for 0 rows. should atleast do allocation for 1 row. + colSize.allocateVector(v, 0); + offsetVector = ((RepeatedValueVector) v).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity()); + dataVector = ((RepeatedValueVector) v).getDataVector(); + assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity()); + v.clear(); + } + + empty.clear(); + rows.clear(); + } + + @Test + public void testSizerNullableFixedWidth() { + BatchSchema schema = new SchemaBuilder().addNullable("a", MinorType.BIGINT).addNullable("b", MinorType.FLOAT8).build(); + RowSetBuilder builder = fixture.rowSetBuilder(schema); + + for (long i = 0; i < 10; i++) { + builder.addRow(i, (float)i*0.1); + } + + RowSet rows = builder.build(); + + // Run the record batch sizer on the resulting batch. + RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + assertEquals(2, sizer.columns().size()); + + ColumnSize aColumn = sizer.columns().get("a"); + ColumnSize bColumn = sizer.columns().get("b"); + + /** + * stdDataSize:8, stdNetSize:8+1, dataSizePerEntry:8, netSizePerEntry:8+1, + * totalDataSize:8*10, totalNetSize:(8+1)*10, valueCount:10, + * elementCount:10, estElementCountPerArray:1, isVariableWidth:false + */ + verifyColumnValues(aColumn, + 8, 9, 8, 9, 80, 90, 10, 10, 1, false); + + verifyColumnValues(bColumn, + 8, 9, 8, 9, 80, 90, 10, 10, 1, false); + + SingleRowSet empty = fixture.rowSet(schema); + VectorAccessible accessible = empty.vectorAccessible(); + + ValueVector bitVector, valueVector; + + for (VectorWrapper<?> vw : accessible) { + ValueVector v = vw.getValueVector(); + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + bitVector = ((NullableVector) v).getBitsVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), bitVector.getValueCapacity()); + valueVector = ((NullableVector) v).getValuesVector(); + assertEquals(Integer.highestOneBit(testRowCount << 1), valueVector.getValueCapacity()); + v.clear(); + + // Allocates the same as value passed since it is already power of two. + colSize.allocateVector(v, testRowCountPowerTwo); + bitVector = ((NullableVector) v).getBitsVector(); + assertEquals(testRowCountPowerTwo, bitVector.getValueCapacity()); + valueVector = ((NullableVector) v).getValuesVector(); + assertEquals(testRowCountPowerTwo, valueVector.getValueCapacity()); + v.clear(); + + // Allocate for max rows. + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT); + bitVector = ((NullableVector) v).getBitsVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, bitVector.getValueCapacity()); + valueVector = ((NullableVector) v).getValuesVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, valueVector.getValueCapacity()); + v.clear(); + + // Allocate for 0 rows. should atleast do allocation for 1 row. + colSize.allocateVector(v, 0); + bitVector = ((NullableVector) v).getBitsVector(); + assertEquals(ValueVector.MIN_ROW_COUNT, bitVector.getValueCapacity()); + valueVector = ((NullableVector) v).getValuesVector(); + assertEquals(ValueVector.MIN_ROW_COUNT, valueVector.getValueCapacity()); + v.clear(); + } + + empty.clear(); + rows.clear(); + } + + @Test + public void testSizerVariableWidth() { + BatchSchema schema = new SchemaBuilder().add("a", MinorType.VARCHAR).build(); + RowSetBuilder builder = fixture.rowSetBuilder(schema); + + StringBuilder stringBuilder = new StringBuilder(); + + // a, aa, aaa, ... aaaaaaaaaa. totalSize = (10*11)/2 = 55 + for (long i = 0; i < 10; i++) { + stringBuilder.append("a"); + builder.addRow(stringBuilder.toString()); + } + RowSet rows = builder.build(); + + // Run the record batch sizer on the resulting batch. + RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + assertEquals(1, sizer.columns().size()); + + ColumnSize aColumn = sizer.columns().get("a"); + + /** + * stdDataSize:50, stdNetSize:50+4, dataSizePerEntry:8, netSizePerEntry:8, + * totalDataSize:(10*11)/2, totalNetSize:(10*11)/2 + 4*10, valueCount:10, + * elementCount:10, estElementCountPerArray:1, isVariableWidth:true + */ + verifyColumnValues(aColumn, + 50, 54, 6, 10, 55, 95, 10, 10, 1, true); + + SingleRowSet empty = fixture.rowSet(schema); + VectorAccessible accessible = empty.vectorAccessible(); + + UInt4Vector offsetVector; + + for (VectorWrapper<?> vw : accessible) { + ValueVector v = vw.getValueVector(); + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + offsetVector = ((VariableWidthVector)v).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(testRowCount << 1)-1, v.getValueCapacity()); + v.clear(); + + // Allocates the same as value passed since it is already power of two. + // -1 is done for adjustment needed for offset vector. + colSize.allocateVector(v, testRowCountPowerTwo - 1); + offsetVector = ((VariableWidthVector)v).getOffsetVector(); + assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); + assertEquals(testRowCountPowerTwo - 1, v.getValueCapacity()); + v.clear(); + + // Allocate for max rows. + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1); + offsetVector = ((VariableWidthVector)v).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MAX_ROW_COUNT - 1, v.getValueCapacity()); + v.clear(); + + // Allocate for 0 rows. should atleast do allocation for 1 row. + colSize.allocateVector(v, 0); + offsetVector = ((VariableWidthVector)v).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MIN_ROW_COUNT, v.getValueCapacity()); + v.clear(); + } + + empty.clear(); + rows.clear(); + } + + + @Test + public void testSizerRepeatedVariableWidth() { + BatchSchema schema = new SchemaBuilder().addArray("b", MinorType.VARCHAR).build(); + RowSetBuilder builder = fixture.rowSetBuilder(schema); + + // size = (5*6)/2 = 15 + String[] newString = new String [] {"a", "aa", "aaa", "aaaa", "aaaaa"}; + + for (long i = 0; i < 10; i++) { + builder.addRow((Object) (newString)); + } + RowSet rows = builder.build(); + + // Run the record batch sizer on the resulting batch. + RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + assertEquals(1, sizer.columns().size()); + + ColumnSize bColumn = sizer.columns().get("b"); + + /** + * stdDataSize:50*10, stdNetSize:50*10+4*10+4, dataSizePerEntry:(5*6)/2, netSizePerEntry:(5*6)/2+5*4+4, + * totalDataSize:(5*6)/2 * 10, totalNetSize: ((5*6)/2+5*4+4)*10, valueCount:10, + * elementCount:50, estElementCountPerArray:5, isVariableWidth:true + */ + verifyColumnValues(bColumn, 500, 544, 15, 39, 150, 390, 10, 50, 5,true); + + SingleRowSet empty = fixture.rowSet(schema); + VectorAccessible accessible = empty.vectorAccessible(); + + for (VectorWrapper<?> vw : accessible) { + ValueVector v = vw.getValueVector(); + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount - 1); + UInt4Vector offsetVector = ((RepeatedValueVector)v).getOffsetVector(); + assertEquals(Integer.highestOneBit(testRowCount) << 1, offsetVector.getValueCapacity()); + VariableWidthVector vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector()); + offsetVector = vwVector.getOffsetVector(); + assertEquals((Integer.highestOneBit((testRowCount-1) * 5) << 1), offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit((testRowCount-1) * 5 << 1)-1, vwVector.getValueCapacity()); + v.clear(); + + // Allocates the same as value passed since it is already power of two. + colSize.allocateVector(v, testRowCountPowerTwo); + offsetVector = ((RepeatedValueVector)v).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCountPowerTwo) << 1), offsetVector.getValueCapacity()); + vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector()); + offsetVector = vwVector.getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCountPowerTwo * 5) << 1), offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(testRowCountPowerTwo * 5 << 1)-1, vwVector.getValueCapacity()); + v.clear(); + + // Allocate for max rows. + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT); + offsetVector = ((RepeatedValueVector)v).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT << 1, offsetVector.getValueCapacity()); + vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector()); + offsetVector = vwVector.getOffsetVector(); + assertEquals((Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * 5) << 1), offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * 5 << 1)-1, vwVector.getValueCapacity()); + v.clear(); + + // Allocate for 0 rows. should atleast do allocation for 1 row. + colSize.allocateVector(v, 0); + offsetVector = ((RepeatedValueVector)v).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity()); + vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector()); + offsetVector = vwVector.getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MIN_ROW_COUNT, vwVector.getValueCapacity()); + v.clear(); + } + + empty.clear(); + rows.clear(); + } + + + @Test + public void testSizerNullableVariableWidth() { + BatchSchema schema = new SchemaBuilder().addNullable("b", MinorType.VARCHAR).build(); + RowSetBuilder builder = fixture.rowSetBuilder(schema); + + StringBuilder stringBuilder = new StringBuilder(); + + for (long i = 0; i < 10; i++) { + stringBuilder.append("a"); + builder.addRow( (Object) stringBuilder.toString()); + } + RowSet rows = builder.build(); + + // Run the record batch sizer on the resulting batch. + RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + assertEquals(1, sizer.columns().size()); + + /** + * stdDataSize:50, stdNetSize:50+4+1, dataSizePerEntry:ceil((10*11)/2)*10), + * netSizePerEntry: dataSizePerEntry+4+1, + * totalDataSize:(10*11)/2, totalNetSize: (10*11)/2 + (4*10) + (1*10), + * valueCount:10, + * elementCount:10, estElementCountPerArray:1, isVariableWidth:true + */ + verifyColumnValues(sizer.columns().get("b"), + 50, 55, 6, 11, 55, 105, 10, 10, 1,true); + + SingleRowSet empty = fixture.rowSet(schema); + VectorAccessible accessible = empty.vectorAccessible(); + + ValueVector bitVector, valueVector; + VariableWidthVector vwVector; + UInt4Vector offsetVector; + + for (VectorWrapper<?> vw : accessible) { + ValueVector v = vw.getValueVector(); + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + bitVector = ((NullableVector) v).getBitsVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), bitVector.getValueCapacity()); + vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector(); + offsetVector = vwVector.getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(testRowCount << 1)-1, vwVector.getValueCapacity()); + + // Allocates the same as value passed since it is already power of two. + colSize.allocateVector(v, testRowCountPowerTwo-1); + bitVector = ((NullableVector) v).getBitsVector(); + assertEquals(Integer.highestOneBit(testRowCountPowerTwo), bitVector.getValueCapacity()); + vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector(); + offsetVector = vwVector.getOffsetVector(); + assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(testRowCountPowerTwo)-1, vwVector.getValueCapacity()); + + // Allocate for max rows. + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT-1); + bitVector = ((NullableVector) v).getBitsVector(); + assertEquals((Integer.highestOneBit(ValueVector.MAX_ROW_COUNT)), bitVector.getValueCapacity()); + vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector(); + offsetVector = vwVector.getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MAX_ROW_COUNT-1, vwVector.getValueCapacity()); + + // Allocate for 0 rows. should atleast do allocation for 1 row. + colSize.allocateVector(v, 0); + bitVector = ((NullableVector) v).getBitsVector(); + assertEquals((Integer.highestOneBit(ValueVector.MIN_ROW_COUNT)), bitVector.getValueCapacity()); + vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector(); + offsetVector = vwVector.getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT+1, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MIN_ROW_COUNT, vwVector.getValueCapacity()); + } + + empty.clear(); + rows.clear(); + } + + + @Test + public void testSizerMap() { + BatchSchema schema = new SchemaBuilder() + .addMap("map") + .add("key", MinorType.INT) + .add("value", MinorType.VARCHAR) + .buildMap() + .build(); + + RowSetBuilder builder = fixture.rowSetBuilder(schema); + + for (int i = 0; i < 10; i++) { + builder.addRow((Object) (new Object[] {10, "a"})); + } + RowSet rows = builder.build(); + + // Run the record batch sizer on the resulting batch. + RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + assertEquals(1, sizer.columns().size()); + + /** + * stdDataSize:50+4, stdNetSize:50+4+4, dataSizePerEntry:4+1, + * netSizePerEntry: 4+1+4, + * totalDataSize:5*10, totalNetSize:4*10+4*10+1*10, + * valueCount:10, + * elementCount:10, estElementCountPerArray:1, isVariableWidth:true + */ + verifyColumnValues(sizer.columns().get("map"), 54, 58, 5, 9, 50, 90, 10, 10, 1, false); + + SingleRowSet empty = fixture.rowSet(schema); + VectorAccessible accessible = empty.vectorAccessible(); + + for (VectorWrapper<?> vw : accessible) { + ValueVector v = vw.getValueVector(); + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + MapVector mapVector = (MapVector)v; + ValueVector keyVector = mapVector.getChild("key"); + ValueVector valueVector1 = mapVector.getChild("value"); + assertEquals((Integer.highestOneBit(testRowCount) << 1), keyVector.getValueCapacity()); + UInt4Vector offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(testRowCount << 1)-1, valueVector1.getValueCapacity()); + + // Allocates the same as value passed since it is already power of two. + colSize.allocateVector(v, testRowCountPowerTwo-1); + mapVector = (MapVector)v; + keyVector = mapVector.getChild("key"); + valueVector1 = mapVector.getChild("value"); + assertEquals((Integer.highestOneBit(testRowCountPowerTwo -1) << 1), keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(testRowCountPowerTwo)-1, valueVector1.getValueCapacity()); + + // Allocate for max rows. + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT -1); + mapVector = (MapVector)v; + keyVector = mapVector.getChild("key"); + valueVector1 = mapVector.getChild("value"); + assertEquals(ValueVector.MAX_ROW_COUNT, keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MAX_ROW_COUNT-1, valueVector1.getValueCapacity()); + + // Allocate for 0 rows. should atleast do allocation for 1 row. + colSize.allocateVector(v, 0); + mapVector = (MapVector)v; + keyVector = mapVector.getChild("key"); + valueVector1 = mapVector.getChild("value"); + assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT+1, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity()); + + v.clear(); + } + + empty.clear(); + rows.clear(); + + } + + @Test + public void testSizerRepeatedMap() { + BatchSchema schema = new SchemaBuilder().addMapArray("map"). + add("key", MinorType.INT). + add("value", MinorType.VARCHAR). + buildMap().build(); + + RowSetBuilder builder = fixture.rowSetBuilder(schema); + + for (int i = 0; i < 10; i++) { + builder.addRow((Object) new Object[] { + new Object[] {110, "a"}, + new Object[] {120, "b"}}); + } + RowSet rows = builder.build(); + + // Run the record batch sizer on the resulting batch. + RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + assertEquals(1, sizer.columns().size()); + + /** + * stdDataSize:50+4, stdNetSize:50+4+4+4, dataSizePerEntry:(4+1)*2, + * netSizePerEntry: 4*2+1*2+4*2+4, + * totalDataSize:5*2*10, totalNetSize:netSizePerEntry*2, + * valueCount:10, + * elementCount:20, estElementCountPerArray:2, isVariableWidth:true + */ + verifyColumnValues(sizer.columns().get("map"), 54,62, 10, 22, 100, 220, 10, 20, 2, false); + + SingleRowSet empty = fixture.rowSet(schema); + VectorAccessible accessible = empty.vectorAccessible(); + + UInt4Vector offsetVector; + + for (VectorWrapper<?> vw : accessible) { + ValueVector v = vw.getValueVector(); + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + RepeatedMapVector mapVector = (RepeatedMapVector)v; + + offsetVector = ((RepeatedValueVector)mapVector).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity()); + + ValueVector keyVector = mapVector.getChild("key"); + ValueVector valueVector1 = mapVector.getChild("value"); + assertEquals(((Integer.highestOneBit(testRowCount) << 1) * 2), keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1)*2, offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(testRowCount << 1)*2 - 1, valueVector1.getValueCapacity()); + + // Allocates the same as value passed since it is already power of two. + colSize.allocateVector(v, testRowCountPowerTwo-1); + mapVector = (RepeatedMapVector)v; + + offsetVector = ((RepeatedValueVector)mapVector).getOffsetVector(); + assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); + + keyVector = mapVector.getChild("key"); + valueVector1 = mapVector.getChild("value"); + assertEquals(testRowCountPowerTwo*2, keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(testRowCountPowerTwo*2, offsetVector.getValueCapacity()); + assertEquals(testRowCountPowerTwo*2 - 1, valueVector1.getValueCapacity()); + + // Allocate for max rows. + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT -1); + mapVector = (RepeatedMapVector)v; + + offsetVector = ((RepeatedValueVector)mapVector).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity()); + + keyVector = mapVector.getChild("key"); + valueVector1 = mapVector.getChild("value"); + assertEquals(ValueVector.MAX_ROW_COUNT * 2, keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, valueVector1.getValueCapacity()); + + // Allocate for 0 rows. should atleast do allocation for 1 row. + colSize.allocateVector(v, 0); + mapVector = (RepeatedMapVector)v; + + offsetVector = ((RepeatedValueVector)mapVector).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity()); + + keyVector = mapVector.getChild("key"); + valueVector1 = mapVector.getChild("value"); + assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT+1, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity()); + + v.clear(); + } + + empty.clear(); + rows.clear(); + } + + @Test + public void testSizerNestedMap() { + BatchSchema schema = new SchemaBuilder() + .addMap("map") + .add("key", MinorType.INT) + .add("value", MinorType.VARCHAR) + .addMap("childMap") + .add("childKey", MinorType.INT) + .add("childValue", MinorType.VARCHAR) + .buildMap() + .buildMap() + .build(); + + RowSetBuilder builder = fixture.rowSetBuilder(schema); + + for (int i = 0; i < 10; i++) { + builder.addRow((Object) (new Object[] {10, "a", new Object[] {5, "b"}})); + } + RowSet rows = builder.build(); + + // Run the record batch sizer on the resulting batch. + RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + assertEquals(1, sizer.columns().size()); + + /** + * stdDataSize:(50+4)*2, stdNetSize:(50+4)*2+4+4, dataSizePerEntry:(4+1)*2, + * netSizePerEntry: 4*2+1*2+4*2, + * totalDataSize:5*2*10, totalNetSize:netSizePerEntry*2, + * valueCount:10, + * elementCount:10, estElementCountPerArray:1, isVariableWidth:true + */ + verifyColumnValues(sizer.columns().get("map"), 108, 116, 10, 18, 100, 180, 10, 10, 1, false); + + SingleRowSet empty = fixture.rowSet(schema); + VectorAccessible accessible = empty.vectorAccessible(); + + UInt4Vector offsetVector; + + for (VectorWrapper<?> vw : accessible) { + ValueVector v = vw.getValueVector(); + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + MapVector mapVector = (MapVector)v; + ValueVector keyVector = mapVector.getChild("key"); + ValueVector valueVector1 = mapVector.getChild("value"); + assertEquals((Integer.highestOneBit(testRowCount) << 1), keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(testRowCount << 1)-1, valueVector1.getValueCapacity()); + MapVector childMapVector = (MapVector) mapVector.getChild("childMap"); + ValueVector childKeyVector = childMapVector.getChild("childKey"); + ValueVector childValueVector1 = childMapVector.getChild("childValue"); + assertEquals((Integer.highestOneBit(testRowCount) << 1), childKeyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity()); + assertEquals(Integer.highestOneBit(testRowCount << 1)-1, childValueVector1.getValueCapacity()); + + // Allocates the same as value passed since it is already power of two. + colSize.allocateVector(v, testRowCountPowerTwo-1); + mapVector = (MapVector)v; + keyVector = mapVector.getChild("key"); + valueVector1 = mapVector.getChild("value"); + assertEquals(testRowCountPowerTwo, keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); + assertEquals(testRowCountPowerTwo-1, valueVector1.getValueCapacity()); + childMapVector = (MapVector) mapVector.getChild("childMap"); + childKeyVector = childMapVector.getChild("childKey"); + childValueVector1 = childMapVector.getChild("childValue"); + assertEquals(testRowCountPowerTwo, childKeyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); + assertEquals(testRowCountPowerTwo-1, childValueVector1.getValueCapacity()); + + // Allocate for max rows. + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT-1); + mapVector = (MapVector)v; + keyVector = mapVector.getChild("key"); + valueVector1 = mapVector.getChild("value"); + assertEquals(ValueVector.MAX_ROW_COUNT, keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MAX_ROW_COUNT-1, valueVector1.getValueCapacity()); + childMapVector = (MapVector) mapVector.getChild("childMap"); + childKeyVector = childMapVector.getChild("childKey"); + childValueVector1 = childMapVector.getChild("childValue"); + assertEquals(ValueVector.MAX_ROW_COUNT, childKeyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MAX_ROW_COUNT-1, childValueVector1.getValueCapacity()); + + // Allocate for 0 rows. should atleast do allocation for 1 row. + colSize.allocateVector(v, 0); + mapVector = (MapVector)v; + keyVector = mapVector.getChild("key"); + valueVector1 = mapVector.getChild("value"); + assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT+1, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity()); + childMapVector = (MapVector) mapVector.getChild("childMap"); + childKeyVector = childMapVector.getChild("childKey"); + childValueVector1 = childMapVector.getChild("childValue"); + assertEquals(ValueVector.MIN_ROW_COUNT, childKeyVector.getValueCapacity()); + offsetVector = ((VariableWidthVector)valueVector1).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT+1, offsetVector.getValueCapacity()); + assertEquals(ValueVector.MIN_ROW_COUNT, childValueVector1.getValueCapacity()); + + v.clear(); + } + + empty.clear(); + rows.clear(); + + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/47c5d1fe/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java ---------------------------------------------------------------------- diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java index 44a467e..58ed57b 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java @@ -84,6 +84,7 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> { */ int MAX_ROW_COUNT = Character.MAX_VALUE + 1; + int MIN_ROW_COUNT = 1; // Commonly-used internal vector names
