[ https://issues.apache.org/jira/browse/DRILL-5657?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16247831#comment-16247831 ]
ASF GitHub Bot commented on DRILL-5657: --------------------------------------- Github user parthchandra commented on a diff in the pull request: https://github.com/apache/drill/pull/914#discussion_r149758695 --- Diff: exec/vector/src/main/codegen/templates/ColumnAccessors.java --- @@ -191,141 +180,268 @@ public void bind(RowIndex vectorIndex, ValueVector vector) { <#if accessorType=="BigDecimal"> <#assign label="Decimal"> </#if> + <#if drillType == "VarChar" || drillType == "Var16Char"> + <#assign accessorType = "byte[]"> + <#assign label = "Bytes"> + </#if> <#if ! notyet> //------------------------------------------------------------------------ // ${drillType} readers and writers - public static class ${drillType}ColumnReader extends AbstractColumnReader { + public static class ${drillType}ColumnReader extends BaseScalarReader { - <@bindReader "" drillType /> + <@bindReader "" drillType false /> - <@getType label /> + <@getType drillType label /> <@get drillType accessorType label false/> } - public static class Nullable${drillType}ColumnReader extends AbstractColumnReader { + public static class Nullable${drillType}ColumnReader extends BaseScalarReader { - <@bindReader "Nullable" drillType /> + <@bindReader "Nullable" drillType false /> - <@getType label /> + <@getType drillType label /> @Override public boolean isNull() { - return accessor().isNull(vectorIndex.index()); - } - - <@get drillType accessorType label false/> - } - - public static class Repeated${drillType}ColumnReader extends AbstractArrayReader { - - <@bindReader "Repeated" drillType /> - - <@getType label /> - - @Override - public int size() { - return accessor().getInnerValueCountAt(vectorIndex.index()); + return accessor().isNull(vectorIndex.vectorIndex()); } - <@get drillType accessorType label true/> + <@get drillType accessorType label false /> } - public static class ${drillType}ColumnWriter extends AbstractColumnWriter { + public static class Repeated${drillType}ColumnReader extends BaseElementReader { - <@bindWriter "" drillType /> + <@bindReader "" drillType true /> - <@getType label /> + <@getType drillType label /> - <@set drillType accessorType label false "set" /> + <@get drillType accessorType label true /> } - public static class Nullable${drillType}ColumnWriter extends AbstractColumnWriter { - - <@bindWriter "Nullable" drillType /> + <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary" /> + <#if varWidth> + public static class ${drillType}ColumnWriter extends BaseVarWidthWriter { + <#else> + public static class ${drillType}ColumnWriter extends BaseFixedWidthWriter { + <#if drillType = "Decimal9" || drillType == "Decimal18" || + drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse"> + private MajorType type; + </#if> + private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH; + </#if> + private final ${drillType}Vector vector; + + public ${drillType}ColumnWriter(final ValueVector vector) { + <#if varWidth> + super(((${drillType}Vector) vector).getOffsetVector()); + <#else> + <#if drillType = "Decimal9" || drillType == "Decimal18" || + drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse"> + type = vector.getField().getType(); + </#if> + </#if> + this.vector = (${drillType}Vector) vector; + } - <@getType label /> + @Override public ValueVector vector() { return vector; } + <#-- All change of buffer comes through this function to allow capturing + the buffer address and capacity. Only two ways to set the buffer: + by binding to a vector in bindVector(), or by resizing the vector + in writeIndex(). --> @Override - public void setNull() { - mutator.setNull(vectorIndex.index()); + protected final void setAddr() { + final DrillBuf buf = vector.getBuffer(); + bufAddr = buf.addr(); + <#if varWidth> + capacity = buf.capacity(); + <#else> + <#-- Turns out that keeping track of capacity as the count of + values simplifies the per-value code path. --> + capacity = buf.capacity() / VALUE_WIDTH; + </#if> } - <@set drillType accessorType label true "set" /> - } - - public static class Repeated${drillType}ColumnWriter extends AbstractArrayWriter { - - <@bindWriter "Repeated" drillType /> + <#-- reallocRaw() is type specific. --> + @Override + protected void realloc(int size) { + vector.reallocRaw(size); + setAddr(); + } - <@getType label /> + <#if ! varWidth> + @Override public int width() { return VALUE_WIDTH; } + + </#if> + <@getType drillType label /> + + <#if accessorType == "byte[]"> + <#assign args = ", int len"> + <#else> + <#assign args = ""> + </#if> + <#if javaType == "char"> + <#assign putType = "short" /> + <#assign doCast = true /> + <#else> + <#assign putType = javaType /> + <#assign doCast = (cast == "set") /> + </#if> + <#if ! varWidth> + @Override + protected final void fillEmpties(final int writeIndex) { + <#-- Fill empties. This is required because the allocated memory is not + zero-filled. --> + while (lastWriteIndex < writeIndex - 1) { + <#-- Implemented in a type-specific way because PlatformDependent does not + provide a general-purpose "fillBytes" function that we could use instead. + If that was provided, we'd just fill the entire missing span of data + with zeros. --> + <#assign putAddr = "bufAddr + ++lastWriteIndex * VALUE_WIDTH" /> --- End diff -- Three +'s in a row are a trifle tricky to read. > Implement size-aware result set loader > -------------------------------------- > > Key: DRILL-5657 > URL: https://issues.apache.org/jira/browse/DRILL-5657 > Project: Apache Drill > Issue Type: Improvement > Affects Versions: Future > Reporter: Paul Rogers > Assignee: Paul Rogers > Fix For: Future > > > A recent extension to Drill's set of test tools created a "row set" > abstraction to allow us to create, and verify, record batches with very few > lines of code. Part of this work involved creating a set of "column > accessors" in the vector subsystem. Column readers provide a uniform API to > obtain data from columns (vectors), while column writers provide a uniform > writing interface. > DRILL-5211 discusses a set of changes to limit value vectors to 16 MB in size > (to avoid memory fragmentation due to Drill's two memory allocators.) The > column accessors have proven to be so useful that they will be the basis for > the new, size-aware writers used by Drill's record readers. > A step in that direction is to retrofit the column writers to use the > size-aware {{setScalar()}} and {{setArray()}} methods introduced in > DRILL-5517. > Since the test framework row set classes are (at present) the only consumer > of the accessors, those classes must also be updated with the changes. > This then allows us to add a new "row mutator" class that handles size-aware > vector writing, including the case in which a vector fills in the middle of a > row. -- This message was sent by Atlassian JIRA (v6.4.14#64029)