http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java index 39b0128..f0b3321 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java @@ -17,15 +17,16 @@ */ package org.apache.drill.test.rowSet; -import java.util.ArrayList; -import java.util.List; - import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.ColumnMetadata; import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleSchema; +import org.apache.drill.exec.record.TupleSchema.MapColumnMetadata; /** * Builder of a row set schema expressed as a list of materialized @@ -59,8 +60,6 @@ public class SchemaBuilder { * need scale and precision, and so on. */ - // TODO: Add map methods - public static class ColumnBuilder { private final String name; private final MajorType.Builder typeBuilder; @@ -78,7 +77,11 @@ public class SchemaBuilder { } public ColumnBuilder setWidth(int width) { - typeBuilder.setPrecision(width); + return setPrecision(width); + } + + public ColumnBuilder setPrecision(int precision) { + typeBuilder.setPrecision(precision); return this; } @@ -101,10 +104,14 @@ public class SchemaBuilder { public static class MapBuilder extends SchemaBuilder { private final SchemaBuilder parent; private final String memberName; + private final DataMode mode; - public MapBuilder(SchemaBuilder parent, String memberName) { + public MapBuilder(SchemaBuilder parent, String memberName, DataMode mode) { this.parent = parent; this.memberName = memberName; + // Optional maps not supported in Drill + assert mode != DataMode.OPTIONAL; + this.mode = mode; } @Override @@ -114,11 +121,14 @@ public class SchemaBuilder { @Override public SchemaBuilder buildMap() { - MaterializedField col = columnSchema(memberName, MinorType.MAP, DataMode.REQUIRED); - for (MaterializedField childCol : columns) { - col.addChild(childCol); + // TODO: Use the map schema directly rather than + // rebuilding it as is done here. + + MaterializedField col = columnSchema(memberName, MinorType.MAP, mode); + for (ColumnMetadata md : schema) { + col.addChild(md.schema()); } - parent.finishMap(col); + parent.finishMap(TupleSchema.newMap(col, schema)); return parent; } @@ -128,7 +138,7 @@ public class SchemaBuilder { } } - protected List<MaterializedField> columns = new ArrayList<>( ); + protected TupleSchema schema = new TupleSchema(); private SelectionVectorMode svMode = SelectionVectorMode.NONE; public SchemaBuilder() { } @@ -144,59 +154,60 @@ public class SchemaBuilder { } } - public SchemaBuilder add(String pathName, MajorType type) { - return add(MaterializedField.create(pathName, type)); + public SchemaBuilder add(String name, MajorType type) { + return add(MaterializedField.create(name, type)); } public SchemaBuilder add(MaterializedField col) { - columns.add(col); + schema.add(col); return this; } /** * Create a column schema using the "basic three" properties of name, type and * cardinality (AKA "data mode.") Use the {@link ColumnBuilder} for to set - * other schema attributes. + * other schema attributes. Name is relative to the enclosing map or tuple; + * it is not the fully qualified path name. */ - public static MaterializedField columnSchema(String pathName, MinorType type, DataMode mode) { - return MaterializedField.create(pathName, + public static MaterializedField columnSchema(String name, MinorType type, DataMode mode) { + return MaterializedField.create(name, MajorType.newBuilder() .setMinorType(type) .setMode(mode) .build()); } - public SchemaBuilder add(String pathName, MinorType type, DataMode mode) { - return add(columnSchema(pathName, type, mode)); + public SchemaBuilder add(String name, MinorType type, DataMode mode) { + return add(columnSchema(name, type, mode)); } - public SchemaBuilder add(String pathName, MinorType type) { - return add(pathName, type, DataMode.REQUIRED); + public SchemaBuilder add(String name, MinorType type) { + return add(name, type, DataMode.REQUIRED); } - public SchemaBuilder add(String pathName, MinorType type, int width) { - MaterializedField field = new SchemaBuilder.ColumnBuilder(pathName, type) + public SchemaBuilder add(String name, MinorType type, int width) { + MaterializedField field = new SchemaBuilder.ColumnBuilder(name, type) .setMode(DataMode.REQUIRED) .setWidth(width) .build(); return add(field); } - public SchemaBuilder addNullable(String pathName, MinorType type) { - return add(pathName, type, DataMode.OPTIONAL); + public SchemaBuilder addNullable(String name, MinorType type) { + return add(name, type, DataMode.OPTIONAL); } - public SchemaBuilder addNullable(String pathName, MinorType type, int width) { - MaterializedField field = new SchemaBuilder.ColumnBuilder(pathName, type) + public SchemaBuilder addNullable(String name, MinorType type, int width) { + MaterializedField field = new SchemaBuilder.ColumnBuilder(name, type) .setMode(DataMode.OPTIONAL) .setWidth(width) .build(); return add(field); } - public SchemaBuilder addArray(String pathName, MinorType type) { - return add(pathName, type, DataMode.REPEATED); + public SchemaBuilder addArray(String name, MinorType type) { + return add(name, type, DataMode.REPEATED); } /** @@ -209,7 +220,11 @@ public class SchemaBuilder { */ public MapBuilder addMap(String pathName) { - return new MapBuilder(this, pathName); + return new MapBuilder(this, pathName, DataMode.REQUIRED); + } + + public MapBuilder addMapArray(String pathName) { + return new MapBuilder(this, pathName, DataMode.REPEATED); } public SchemaBuilder withSVMode(SelectionVectorMode svMode) { @@ -218,14 +233,18 @@ public class SchemaBuilder { } public BatchSchema build() { - return new BatchSchema(svMode, columns); + return schema.toBatchSchema(svMode); } - void finishMap(MaterializedField map) { - columns.add(map); + void finishMap(MapColumnMetadata map) { + schema.add(map); } public SchemaBuilder buildMap() { throw new IllegalStateException("Cannot build map for a top-level schema"); } + + public TupleMetadata buildSchema() { + return schema; + } }
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java index ff93bf0..2076b16 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java @@ -23,9 +23,10 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.vector.accessor.ColumnAccessor; -import org.apache.drill.exec.vector.accessor.ColumnReader; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.ValueType; import org.apache.drill.test.rowSet.RowSet; +import org.apache.drill.test.rowSet.RowSetReader; import java.io.BufferedOutputStream; import java.io.File; @@ -44,13 +45,14 @@ public class JsonFileBuilder public static final String DEFAULT_DECIMAL_FORMATTER = "%s"; public static final String DEFAULT_PERIOD_FORMATTER = "%s"; + @SuppressWarnings("unchecked") public static final Map<String, String> DEFAULT_FORMATTERS = new ImmutableMap.Builder() - .put(ColumnAccessor.ValueType.DOUBLE, DEFAULT_DOUBLE_FORMATTER) - .put(ColumnAccessor.ValueType.INTEGER, DEFAULT_INTEGER_FORMATTER) - .put(ColumnAccessor.ValueType.LONG, DEFAULT_LONG_FORMATTER) - .put(ColumnAccessor.ValueType.STRING, DEFAULT_STRING_FORMATTER) - .put(ColumnAccessor.ValueType.DECIMAL, DEFAULT_DECIMAL_FORMATTER) - .put(ColumnAccessor.ValueType.PERIOD, DEFAULT_PERIOD_FORMATTER) + .put(ValueType.DOUBLE, DEFAULT_DOUBLE_FORMATTER) + .put(ValueType.INTEGER, DEFAULT_INTEGER_FORMATTER) + .put(ValueType.LONG, DEFAULT_LONG_FORMATTER) + .put(ValueType.STRING, DEFAULT_STRING_FORMATTER) + .put(ValueType.DECIMAL, DEFAULT_DECIMAL_FORMATTER) + .put(ValueType.PERIOD, DEFAULT_PERIOD_FORMATTER) .build(); private final RowSet rowSet; @@ -66,8 +68,7 @@ public class JsonFileBuilder Preconditions.checkNotNull(columnFormatter); Iterator<MaterializedField> fields = rowSet - .schema() - .batch() + .batchSchema() .iterator(); boolean hasColumn = false; @@ -90,14 +91,12 @@ public class JsonFileBuilder tableFile.getParentFile().mkdirs(); try (BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(tableFile))) { - final RowSet.RowSetReader reader = rowSet.reader(); + final RowSetReader reader = rowSet.reader(); final int numCols = rowSet - .schema() - .batch() + .batchSchema() .getFieldCount(); final Iterator<MaterializedField> fieldIterator = rowSet - .schema() - .batch() + .batchSchema() .iterator(); final List<String> columnNames = Lists.newArrayList(); final List<String> columnFormatters = Lists.newArrayList(); @@ -105,8 +104,8 @@ public class JsonFileBuilder // Build formatters from first row. while (fieldIterator.hasNext()) { final String columnName = fieldIterator.next().getName(); - final ColumnReader columnReader = reader.column(columnName); - final ColumnAccessor.ValueType valueType = columnReader.valueType(); + final ScalarReader columnReader = reader.scalar(columnName); + final ValueType valueType = columnReader.valueType(); final String columnFormatter; if (customFormatters.containsKey(columnName)) { @@ -135,7 +134,7 @@ public class JsonFileBuilder sb.append(separator); final String columnName = columnNames.get(columnIndex); - final ColumnReader columnReader = reader.column(columnIndex); + final ScalarReader columnReader = reader.scalar(columnIndex); final String columnFormatter = columnFormatters.get(columnIndex); final Object columnObject = columnReader.getObject(); final String columnString = String.format(columnFormatter, columnObject); http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/DummyWriterTest.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/DummyWriterTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/DummyWriterTest.java new file mode 100644 index 0000000..db33b30 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/DummyWriterTest.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet.test; + +import static org.junit.Assert.assertNull; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter; +import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter; +import org.apache.drill.exec.vector.accessor.writer.ColumnWriterFactory; +import org.apache.drill.test.SubOperatorTest; +import org.apache.drill.test.rowSet.SchemaBuilder; +import org.junit.Test; + +public class DummyWriterTest extends SubOperatorTest { + + /** + * Test only, bare-bones tuple writer used to gather the dummy + * column writers. + */ + + public class RootWriterFixture extends AbstractTupleWriter { + + protected RootWriterFixture(TupleMetadata schema, + List<AbstractObjectWriter> writers) { + super(schema, writers); + } + } + + /** + * Test dummy column writers for scalars and arrays of + * scalars. + */ + + @Test + public void testDummyScalar() { + + TupleMetadata schema = new SchemaBuilder() + .add("a", MinorType.INT) + .addArray("b", MinorType.VARCHAR) + .buildSchema(); + List<AbstractObjectWriter> writers = new ArrayList<>(); + + // We provide no vector. Factory should build us "dummy" writers. + + writers.add(ColumnWriterFactory.buildColumnWriter(schema.metadata("a"), null)); + writers.add(ColumnWriterFactory.buildColumnWriter(schema.metadata("b"), null)); + AbstractTupleWriter rootWriter = new RootWriterFixture(schema, writers); + + // Events are ignored. + + rootWriter.startWrite(); + rootWriter.startRow(); + + // At present, dummy writers report no type (because they don't have one.) + + assertNull(rootWriter.scalar(0).valueType()); + + // First column. Set int value. + + rootWriter.scalar(0).setInt(10); + + // Dummy writer does not do type checking. Write "wrong" type. + // Should be allowed. + + rootWriter.scalar("a").setString("foo"); + + // Column is required, but writer does no checking. Can set + // a null value. + + rootWriter.column(0).scalar().setNull(); + + // Second column: is an array. + + rootWriter.array(1).scalar().setString("bar"); + rootWriter.array(1).scalar().setString("mumble"); + + // Again, type is not checked. + + rootWriter.array("b").scalar().setInt(200); + + // More ignored events. + + rootWriter.restartRow(); + rootWriter.saveRow(); + rootWriter.endWrite(); + } + + /** + * Test a dummy map or map array. A (non-enforced) rule is that such maps + * contain only dummy writers. The writers act like "real" writers. + */ + + @Test + public void testDummyMap() { + + TupleMetadata schema = new SchemaBuilder() + .addMap("m1") + .add("a", MinorType.INT) + .addArray("b", MinorType.VARCHAR) + .buildMap() + .addMapArray("m2") + .add("c", MinorType.INT) + .buildMap() + .buildSchema(); + List<AbstractObjectWriter> writers = new ArrayList<>(); + + { + schema.metadata("m1").setProjected(false); + TupleMetadata mapSchema = schema.metadata("m1").mapSchema(); + List<AbstractObjectWriter> members = new ArrayList<>(); + members.add(ColumnWriterFactory.buildColumnWriter(mapSchema.metadata("a"), null)); + members.add(ColumnWriterFactory.buildColumnWriter(mapSchema.metadata("b"), null)); + writers.add(ColumnWriterFactory.buildMapWriter(schema.metadata("m1"), null, members)); + } + + { + schema.metadata("m2").setProjected(false); + TupleMetadata mapSchema = schema.metadata("m2").mapSchema(); + List<AbstractObjectWriter> members = new ArrayList<>(); + members.add(ColumnWriterFactory.buildColumnWriter(mapSchema.metadata("c"), null)); + writers.add(ColumnWriterFactory.buildMapWriter(schema.metadata("m2"), null, members)); + } + + AbstractTupleWriter rootWriter = new RootWriterFixture(schema, writers); + + // Events are ignored. + + rootWriter.startWrite(); + rootWriter.startRow(); + + // Dummy columns seem real. + + rootWriter.tuple("m1").scalar("a").setInt(20); + rootWriter.tuple(0).array("b").scalar().setString("foo"); + + // Dummy array map seems real. + + rootWriter.array("m2").tuple().scalar("c").setInt(30); + rootWriter.array("m2").save(); + rootWriter.array(1).tuple().scalar(0).setInt(40); + rootWriter.array(1).save(); + + // More ignored events. + + rootWriter.restartRow(); + rootWriter.saveRow(); + rootWriter.endWrite(); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java new file mode 100644 index 0000000..10e9032 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet.test; + +import java.util.concurrent.TimeUnit; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.ColumnMetadata; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleSchema; +import org.apache.drill.exec.vector.IntVector; +import org.apache.drill.exec.vector.NullableIntVector; +import org.apache.drill.exec.vector.RepeatedIntVector; +import org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter.ArrayObjectWriter; +import org.apache.drill.exec.vector.accessor.writer.NullableScalarWriter; +import org.apache.drill.exec.vector.accessor.writer.ScalarArrayWriter; +import org.apache.drill.test.OperatorFixture; +import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; +import org.apache.drill.test.rowSet.RowSetWriter; +import org.apache.drill.test.rowSet.SchemaBuilder; + +import com.google.common.base.Stopwatch; + +/** + * Tests the performance of the writers compared to using the value + * vector mutators directly. In order to achieve apples-to-apples + * comparison, the tests work directly with individual columns in + * the writer case; the row writer level is omitted as the row writer + * simulates the reader logic previously used to write to vectors. + * <p> + * Current results: + * <ul> + * <li>Writer is 42% faster than a required mutator.</li> + * <li>Writer is 73% faster than a nullable mutator.</li> + * <li>Writer is 407% faster than a repeated mutator.</li> + * </ul> + * Since performance is critical for this component (this is the + * ultimate "inner loop", please run these tests periodically to + * ensure that performance does not drop; it is very easy to add + * a bit of code here or there that greatly impacts performance. + * <p> + * This is not a JUnit test. Rather, it is a stand-alone program + * which must be run explicitly. One handy way is to run it from + * your IDE. If using Eclipse, monitor the system to wait for Eclipse + * to finish its background processing before launching. + */ + +public class PerformanceTool { + + public static final int ROW_COUNT = 16 * 1024 * 1024 / 4; + public static final int ITERATIONS = 300; + + public static abstract class PerfTester { + final TupleMetadata rowSchema; + final MaterializedField field; + final OperatorFixture fixture; + final String label; + final Stopwatch timer = Stopwatch.createUnstarted(); + + public PerfTester(OperatorFixture fixture, DataMode mode, String label) { + this.fixture = fixture; + this.label = label; + field = SchemaBuilder.columnSchema("a", MinorType.INT, mode); + rowSchema = new SchemaBuilder() + .add(field) + .buildSchema(); + } + + public void runTest() { + for (int i = 0; i < ITERATIONS; i++) { + doTest(); + } + System.out.println(label + ": " + timer.elapsed(TimeUnit.MILLISECONDS)); + } + + public abstract void doTest(); + } + + public static class RequiredVectorTester extends PerfTester { + + public RequiredVectorTester(OperatorFixture fixture) { + super(fixture, DataMode.REQUIRED, "Required vector"); + } + + @Override + public void doTest() { + try (IntVector vector = new IntVector(field, fixture.allocator());) { + vector.allocateNew(4096); + IntVector.Mutator mutator = vector.getMutator(); + timer.start(); + for (int i = 0; i < ROW_COUNT; i++) { + mutator.setSafe(i, 1234); + } + timer.stop(); + } + } + } + + public static class NullableVectorTester extends PerfTester { + + public NullableVectorTester(OperatorFixture fixture) { + super(fixture, DataMode.OPTIONAL, "Nullable vector"); + } + + @Override + public void doTest() { + try (NullableIntVector vector = new NullableIntVector(field, fixture.allocator());) { + vector.allocateNew(4096); + NullableIntVector.Mutator mutator = vector.getMutator(); + timer.start(); + for (int i = 0; i < ROW_COUNT; i++) { + mutator.setSafe(i, 1234); + } + timer.stop(); + } + } + } + + public static class RepeatedVectorTester extends PerfTester { + + public RepeatedVectorTester(OperatorFixture fixture) { + super(fixture, DataMode.REQUIRED, "Repeated vector"); + } + + @Override + public void doTest() { + try (RepeatedIntVector vector = new RepeatedIntVector(field, fixture.allocator());) { + vector.allocateNew(ROW_COUNT, 5 * ROW_COUNT); + RepeatedIntVector.Mutator mutator = vector.getMutator(); + timer.start(); + for (int i = 0; i < ROW_COUNT / 5; i++) { + mutator.startNewValue(i); + mutator.addSafe(i, 12341); + mutator.addSafe(i, 12342); + mutator.addSafe(i, 12343); + mutator.addSafe(i, 12344); + mutator.addSafe(i, 12345); + } + timer.stop(); + } + } + } + + private static class TestWriterIndex implements ColumnWriterIndex { + + public int index; + + @Override + public int vectorIndex() { return index; } + + @Override + public final void nextElement() { index++; } + + @Override + public void rollover() { } + + @Override + public int rowStartIndex() { return index; } + + @Override + public ColumnWriterIndex outerIndex() { return null; } + } + + public static class RequiredWriterTester extends PerfTester { + + public RequiredWriterTester(OperatorFixture fixture) { + super(fixture, DataMode.REQUIRED, "Required writer"); + } + + @Override + public void doTest() { + try (IntVector vector = new IntVector(rowSchema.column(0), fixture.allocator());) { + vector.allocateNew(ROW_COUNT); + IntColumnWriter colWriter = new IntColumnWriter(vector); + TestWriterIndex index = new TestWriterIndex(); + colWriter.bindIndex(index); + colWriter.startWrite(); + timer.start(); + while (index.index < ROW_COUNT) { + colWriter.setInt(1234); + } + timer.stop(); + colWriter.endWrite(); + } + } + } + + public static class NullableWriterTester extends PerfTester { + + public NullableWriterTester(OperatorFixture fixture) { + super(fixture, DataMode.OPTIONAL, "Nullable writer"); + } + + @Override + public void doTest() { + try (NullableIntVector vector = new NullableIntVector(rowSchema.column(0), fixture.allocator());) { + vector.allocateNew(ROW_COUNT); + NullableScalarWriter colWriter = new NullableScalarWriter( + vector, new IntColumnWriter(vector.getValuesVector())); + TestWriterIndex index = new TestWriterIndex(); + colWriter.bindIndex(index); + colWriter.startWrite(); + timer.start(); + while (index.index < ROW_COUNT) { + colWriter.setInt(1234); + } + timer.stop(); + colWriter.endWrite(); + } + } + } + + public static class ArrayWriterTester extends PerfTester { + + public ArrayWriterTester(OperatorFixture fixture) { + super(fixture, DataMode.REQUIRED, "Array writer"); + } + + @Override + public void doTest() { + try (RepeatedIntVector vector = new RepeatedIntVector(rowSchema.column(0), fixture.allocator());) { + vector.allocateNew(ROW_COUNT, 5 * ROW_COUNT); + IntColumnWriter colWriter = new IntColumnWriter(vector.getDataVector()); + ColumnMetadata colSchema = TupleSchema.fromField(vector.getField()); + ArrayObjectWriter arrayWriter = ScalarArrayWriter.build(colSchema, vector, colWriter); + TestWriterIndex index = new TestWriterIndex(); + arrayWriter.events().bindIndex(index); + arrayWriter.events().startWrite(); + timer.start(); + for ( ; index.index < ROW_COUNT / 5; index.index++) { + arrayWriter.events().startRow(); + colWriter.setInt(12341); + colWriter.setInt(12342); + colWriter.setInt(12343); + colWriter.setInt(12344); + colWriter.setInt(12345); + arrayWriter.events().endArrayValue(); + } + timer.stop(); + arrayWriter.events().endWrite(); + } + } + } + + public static void main(String args[]) { + try (OperatorFixture fixture = OperatorFixture.standardFixture();) { + for (int i = 0; i < 2; i++) { + System.out.println((i==0) ? "Warmup" : "Test run"); + new RequiredVectorTester(fixture).runTest(); + new RequiredWriterTester(fixture).runTest(); + new NullableVectorTester(fixture).runTest(); + new NullableWriterTester(fixture).runTest(); + new RepeatedVectorTester(fixture).runTest(); + new ArrayWriterTester(fixture).runTest(); + } + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + @SuppressWarnings("unused") + private static void testWriter2(TupleMetadata rowSchema, + OperatorFixture fixture, Stopwatch timer) { + ExtendableRowSet rs = fixture.rowSet(rowSchema); + RowSetWriter writer = rs.writer(4096); + ScalarWriter colWriter = writer.scalar(0); + timer.start(); + for (int i = 0; i < ROW_COUNT; i++) { + colWriter.setInt(i); + writer.save(); + } + timer.stop(); + writer.done().clear(); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java index af35cdf..4db4d09 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java @@ -19,362 +19,508 @@ package org.apache.drill.test.rowSet.test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.UnsupportedEncodingException; +import java.util.Arrays; -import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.accessor.ArrayReader; import org.apache.drill.exec.vector.accessor.ArrayWriter; -import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.TupleReader; +import org.apache.drill.exec.vector.accessor.TupleWriter; +import org.apache.drill.exec.vector.accessor.ValueType; +import org.apache.drill.exec.vector.complex.MapVector; +import org.apache.drill.exec.vector.complex.RepeatedMapVector; import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; -import org.apache.drill.test.rowSet.RowSet.RowSetReader; -import org.apache.drill.test.rowSet.RowSet.RowSetWriter; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; import org.apache.drill.test.rowSet.RowSetComparison; -import org.apache.drill.test.rowSet.RowSetSchema; -import org.apache.drill.test.rowSet.RowSetSchema.FlattenedSchema; -import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema; +import org.apache.drill.test.rowSet.RowSetReader; +import org.apache.drill.test.rowSet.RowSetWriter; import org.apache.drill.test.rowSet.SchemaBuilder; import org.junit.Test; -import com.google.common.base.Splitter; +/** + * Test row sets. Since row sets are a thin wrapper around vectors, + * readers and writers, this is also a test of those constructs. + * <p> + * Tests basic protocol of the writers: <pre><code> + * row : tuple + * tuple : column * + * column : scalar obj | array obj | tuple obj + * scalar obj : scalar + * array obj : array writer + * array writer : element + * element : column + * tuple obj : tuple</code></pre> + */ public class RowSetTest extends SubOperatorTest { /** - * Test a simple physical schema with no maps. + * Test the simplest constructs: a row with top-level scalar + * columns. + * <p> + * The focus here is the structure of the readers and writers, along + * with the row set loader and verifier that use those constructs. + * That is, while this test uses the int vector, this test is not + * focused on that vector. */ -// @Test -// public void testSchema() { -// BatchSchema batchSchema = new SchemaBuilder() -// .add("c", MinorType.INT) -// .add("a", MinorType.INT, DataMode.REPEATED) -// .addNullable("b", MinorType.VARCHAR) -// .build(); -// -// assertEquals("c", batchSchema.getColumn(0).getName()); -// assertEquals("a", batchSchema.getColumn(1).getName()); -// assertEquals("b", batchSchema.getColumn(2).getName()); -// -// RowSetSchema schema = new RowSetSchema(batchSchema); -// TupleSchema access = schema.hierarchicalAccess(); -// assertEquals(3, access.count()); -// -// crossCheck(access, 0, "c", MinorType.INT); -// assertEquals(DataMode.REQUIRED, access.column(0).getDataMode()); -// assertEquals(DataMode.REQUIRED, access.column(0).getType().getMode()); -// assertTrue(! access.column(0).isNullable()); -// -// crossCheck(access, 1, "a", MinorType.INT); -// assertEquals(DataMode.REPEATED, access.column(1).getDataMode()); -// assertEquals(DataMode.REPEATED, access.column(1).getType().getMode()); -// assertTrue(! access.column(1).isNullable()); -// -// crossCheck(access, 2, "b", MinorType.VARCHAR); -// assertEquals(MinorType.VARCHAR, access.column(2).getType().getMinorType()); -// assertEquals(DataMode.OPTIONAL, access.column(2).getDataMode()); -// assertEquals(DataMode.OPTIONAL, access.column(2).getType().getMode()); -// assertTrue(access.column(2).isNullable()); -// -// // No maps: physical schema is the same as access schema. -// -// PhysicalSchema physical = schema.physical(); -// assertEquals(3, physical.count()); -// assertEquals("c", physical.column(0).field().getName()); -// assertEquals("a", physical.column(1).field().getName()); -// assertEquals("b", physical.column(2).field().getName()); -// } + @Test + public void testScalarStructure() { + TupleMetadata schema = new SchemaBuilder() + .add("a", MinorType.INT) + .buildSchema(); + ExtendableRowSet rowSet = fixture.rowSet(schema); + RowSetWriter writer = rowSet.writer(); - /** - * Validate that the actual column metadata is as expected by - * cross-checking: validate that the column at the index and - * the column at the column name are both correct. - * - * @param schema the schema for the row set - * @param index column index - * @param fullName expected column name - * @param type expected type - */ + // Required Int + // Verify the invariants of the "full" and "simple" access paths -// public void crossCheck(TupleSchema schema, int index, String fullName, MinorType type) { -// String name = null; -// for (String part : Splitter.on(".").split(fullName)) { -// name = part; -// } -// assertEquals(name, schema.column(index).getName()); -// assertEquals(index, schema.columnIndex(fullName)); -// assertSame(schema.column(index), schema.column(fullName)); -// assertEquals(type, schema.column(index).getType().getMinorType()); -// } + assertEquals(ObjectType.SCALAR, writer.column("a").type()); + assertSame(writer.column("a"), writer.column(0)); + assertSame(writer.scalar("a"), writer.scalar(0)); + assertSame(writer.column("a").scalar(), writer.scalar("a")); + assertSame(writer.column(0).scalar(), writer.scalar(0)); + assertEquals(ValueType.INTEGER, writer.scalar(0).valueType()); - /** - * Verify that a nested map schema works as expected. - */ + // Sanity checks -// @Test -// public void testMapSchema() { -// BatchSchema batchSchema = new SchemaBuilder() -// .add("c", MinorType.INT) -// .addMap("a") -// .addNullable("b", MinorType.VARCHAR) -// .add("d", MinorType.INT) -// .addMap("e") -// .add("f", MinorType.VARCHAR) -// .buildMap() -// .add("g", MinorType.INT) -// .buildMap() -// .add("h", MinorType.BIGINT) -// .build(); -// -// RowSetSchema schema = new RowSetSchema(batchSchema); -// -// // Access schema: flattened with maps removed -// -// FlattenedSchema access = schema.flatAccess(); -// assertEquals(6, access.count()); -// crossCheck(access, 0, "c", MinorType.INT); -// crossCheck(access, 1, "a.b", MinorType.VARCHAR); -// crossCheck(access, 2, "a.d", MinorType.INT); -// crossCheck(access, 3, "a.e.f", MinorType.VARCHAR); -// crossCheck(access, 4, "a.g", MinorType.INT); -// crossCheck(access, 5, "h", MinorType.BIGINT); -// -// // Should have two maps. -// -// assertEquals(2, access.mapCount()); -// assertEquals("a", access.map(0).getName()); -// assertEquals("e", access.map(1).getName()); -// assertEquals(0, access.mapIndex("a")); -// assertEquals(1, access.mapIndex("a.e")); -// -// // Verify physical schema: should mirror the schema created above. -// -// PhysicalSchema physical = schema.physical(); -// assertEquals(3, physical.count()); -// assertEquals("c", physical.column(0).field().getName()); -// assertEquals("c", physical.column(0).fullName()); -// assertFalse(physical.column(0).isMap()); -// assertNull(physical.column(0).mapSchema()); -// -// assertEquals("a", physical.column(1).field().getName()); -// assertEquals("a", physical.column(1).fullName()); -// assertTrue(physical.column(1).isMap()); -// assertNotNull(physical.column(1).mapSchema()); -// -// assertEquals("h", physical.column(2).field().getName()); -// assertEquals("h", physical.column(2).fullName()); -// assertFalse(physical.column(2).isMap()); -// assertNull(physical.column(2).mapSchema()); -// -// PhysicalSchema aSchema = physical.column(1).mapSchema(); -// assertEquals(4, aSchema.count()); -// assertEquals("b", aSchema.column(0).field().getName()); -// assertEquals("a.b", aSchema.column(0).fullName()); -// assertEquals("d", aSchema.column(1).field().getName()); -// assertEquals("e", aSchema.column(2).field().getName()); -// assertEquals("g", aSchema.column(3).field().getName()); -// -// PhysicalSchema eSchema = aSchema.column(2).mapSchema(); -// assertEquals(1, eSchema.count()); -// assertEquals("f", eSchema.column(0).field().getName()); -// assertEquals("a.e.f", eSchema.column(0).fullName()); -// } + try { + writer.column(0).array(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + try { + writer.column(0).tuple(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } - /** - * Verify that simple scalar (non-repeated) column readers - * and writers work as expected. This is for tiny ints. - */ + // Test the various ways to get at the scalar writer. - @Test - public void testTinyIntRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.TINYINT) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0) - .add(Byte.MAX_VALUE) - .add(Byte.MIN_VALUE) - .build(); - assertEquals(3, rs.rowCount()); - RowSetReader reader = rs.reader(); + writer.column("a").scalar().setInt(10); + writer.save(); + writer.scalar("a").setInt(20); + writer.save(); + writer.column(0).scalar().setInt(30); + writer.save(); + writer.scalar(0).setInt(40); + writer.save(); + + // Finish the row set and get a reader. + + SingleRowSet actual = writer.done(); + RowSetReader reader = actual.reader(); + + // Verify invariants + + assertEquals(ObjectType.SCALAR, reader.column(0).type()); + assertSame(reader.column("a"), reader.column(0)); + assertSame(reader.scalar("a"), reader.scalar(0)); + assertSame(reader.column("a").scalar(), reader.scalar("a")); + assertSame(reader.column(0).scalar(), reader.scalar(0)); + assertEquals(ValueType.INTEGER, reader.scalar(0).valueType()); + + // Test various accessors: full and simple + + assertTrue(reader.next()); + assertEquals(10, reader.column("a").scalar().getInt()); assertTrue(reader.next()); - assertEquals(0, reader.column(0).getInt()); + assertEquals(20, reader.scalar("a").getInt()); assertTrue(reader.next()); - assertEquals(Byte.MAX_VALUE, reader.column(0).getInt()); - assertEquals((int) Byte.MAX_VALUE, reader.column(0).getObject()); + assertEquals(30, reader.column(0).scalar().getInt()); assertTrue(reader.next()); - assertEquals(Byte.MIN_VALUE, reader.column(0).getInt()); + assertEquals(40, reader.scalar(0).getInt()); assertFalse(reader.next()); - rs.clear(); - } - @Test - public void testSmallIntRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.SMALLINT) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0) - .add(Short.MAX_VALUE) - .add(Short.MIN_VALUE) + // Test the above again via the writer and reader + // utility classes. + + SingleRowSet expected = fixture.rowSetBuilder(schema) + .addRow(10) + .addRow(20) + .addRow(30) + .addRow(40) .build(); - RowSetReader reader = rs.reader(); - assertTrue(reader.next()); - assertEquals(0, reader.column(0).getInt()); - assertTrue(reader.next()); - assertEquals(Short.MAX_VALUE, reader.column(0).getInt()); - assertEquals((int) Short.MAX_VALUE, reader.column(0).getObject()); - assertTrue(reader.next()); - assertEquals(Short.MIN_VALUE, reader.column(0).getInt()); - assertFalse(reader.next()); - rs.clear(); + new RowSetComparison(expected).verifyAndClearAll(actual); } + /** + * Test a record with a top level array. The focus here is on the + * scalar array structure. + * + * @throws VectorOverflowException should never occur + */ + @Test - public void testIntRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.INT) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0) - .add(Integer.MAX_VALUE) - .add(Integer.MIN_VALUE) - .build(); - RowSetReader reader = rs.reader(); + public void testScalarArrayStructure() { + TupleMetadata schema = new SchemaBuilder() + .addArray("a", MinorType.INT) + .buildSchema(); + ExtendableRowSet rowSet = fixture.rowSet(schema); + RowSetWriter writer = rowSet.writer(); + + // Repeated Int + // Verify the invariants of the "full" and "simple" access paths + + assertEquals(ObjectType.ARRAY, writer.column("a").type()); + + assertSame(writer.column("a"), writer.column(0)); + assertSame(writer.array("a"), writer.array(0)); + assertSame(writer.column("a").array(), writer.array("a")); + assertSame(writer.column(0).array(), writer.array(0)); + + assertEquals(ObjectType.SCALAR, writer.column("a").array().entry().type()); + assertEquals(ObjectType.SCALAR, writer.column("a").array().entryType()); + assertSame(writer.array(0).entry().scalar(), writer.array(0).scalar()); + assertEquals(ValueType.INTEGER, writer.array(0).scalar().valueType()); + + // Sanity checks + + try { + writer.column(0).scalar(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + try { + writer.column(0).tuple(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + + // Write some data + + ScalarWriter intWriter = writer.array("a").scalar(); + intWriter.setInt(10); + intWriter.setInt(11); + writer.save(); + intWriter.setInt(20); + intWriter.setInt(21); + intWriter.setInt(22); + writer.save(); + intWriter.setInt(30); + writer.save(); + intWriter.setInt(40); + intWriter.setInt(41); + writer.save(); + + // Finish the row set and get a reader. + + SingleRowSet actual = writer.done(); + RowSetReader reader = actual.reader(); + + // Verify the invariants of the "full" and "simple" access paths + + assertEquals(ObjectType.ARRAY, writer.column("a").type()); + + assertSame(reader.column("a"), reader.column(0)); + assertSame(reader.array("a"), reader.array(0)); + assertSame(reader.column("a").array(), reader.array("a")); + assertSame(reader.column(0).array(), reader.array(0)); + + assertEquals(ObjectType.SCALAR, reader.column("a").array().entryType()); + assertEquals(ValueType.INTEGER, reader.array(0).elements().valueType()); + + // Read and verify the rows + + ScalarElementReader intReader = reader.array(0).elements(); + assertTrue(reader.next()); + assertEquals(2, intReader.size()); + assertEquals(10, intReader.getInt(0)); + assertEquals(11, intReader.getInt(1)); assertTrue(reader.next()); - assertEquals(0, reader.column(0).getInt()); + assertEquals(3, intReader.size()); + assertEquals(20, intReader.getInt(0)); + assertEquals(21, intReader.getInt(1)); + assertEquals(22, intReader.getInt(2)); assertTrue(reader.next()); - assertEquals(Integer.MAX_VALUE, reader.column(0).getInt()); - assertEquals(Integer.MAX_VALUE, reader.column(0).getObject()); + assertEquals(1, intReader.size()); + assertEquals(30, intReader.getInt(0)); assertTrue(reader.next()); - assertEquals(Integer.MIN_VALUE, reader.column(0).getInt()); + assertEquals(2, intReader.size()); + assertEquals(40, intReader.getInt(0)); + assertEquals(41, intReader.getInt(1)); assertFalse(reader.next()); - rs.clear(); - } - @Test - public void testLongRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.BIGINT) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0L) - .add(Long.MAX_VALUE) - .add(Long.MIN_VALUE) + // Test the above again via the writer and reader + // utility classes. + + SingleRowSet expected = fixture.rowSetBuilder(schema) + .addSingleCol(new int[] {10, 11}) + .addSingleCol(new int[] {20, 21, 22}) + .addSingleCol(new int[] {30}) + .addSingleCol(new int[] {40, 41}) .build(); - RowSetReader reader = rs.reader(); - assertTrue(reader.next()); - assertEquals(0, reader.column(0).getLong()); - assertTrue(reader.next()); - assertEquals(Long.MAX_VALUE, reader.column(0).getLong()); - assertEquals(Long.MAX_VALUE, reader.column(0).getObject()); - assertTrue(reader.next()); - assertEquals(Long.MIN_VALUE, reader.column(0).getLong()); - assertFalse(reader.next()); - rs.clear(); + new RowSetComparison(expected) + .verifyAndClearAll(actual); } + /** + * Test a simple map structure at the top level of a row. + * + * @throws VectorOverflowException should never occur + */ + @Test - public void testFloatRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.FLOAT4) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0F) - .add(Float.MAX_VALUE) - .add(Float.MIN_VALUE) - .build(); - RowSetReader reader = rs.reader(); + public void testMapStructure() { + TupleMetadata schema = new SchemaBuilder() + .add("a", MinorType.INT) + .addMap("m") + .addArray("b", MinorType.INT) + .buildMap() + .buildSchema(); + ExtendableRowSet rowSet = fixture.rowSet(schema); + RowSetWriter writer = rowSet.writer(); + + // Map and Int + // Test Invariants + + assertEquals(ObjectType.SCALAR, writer.column("a").type()); + assertEquals(ObjectType.SCALAR, writer.column(0).type()); + assertEquals(ObjectType.TUPLE, writer.column("m").type()); + assertEquals(ObjectType.TUPLE, writer.column(1).type()); + assertSame(writer.column(1).tuple(), writer.tuple(1)); + + TupleWriter mapWriter = writer.column(1).tuple(); + assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entry().type()); + assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entryType()); + + ScalarWriter aWriter = writer.column("a").scalar(); + ScalarWriter bWriter = writer.column("m").tuple().column("b").array().entry().scalar(); + assertSame(bWriter, writer.tuple(1).array(0).scalar()); + assertEquals(ValueType.INTEGER, bWriter.valueType()); + + // Sanity checks + + try { + writer.column(1).scalar(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + try { + writer.column(1).array(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + + // Write data + + aWriter.setInt(10); + bWriter.setInt(11); + bWriter.setInt(12); + writer.save(); + aWriter.setInt(20); + bWriter.setInt(21); + bWriter.setInt(22); + writer.save(); + aWriter.setInt(30); + bWriter.setInt(31); + bWriter.setInt(32); + writer.save(); + + // Finish the row set and get a reader. + + SingleRowSet actual = writer.done(); + RowSetReader reader = actual.reader(); + + assertEquals(ObjectType.SCALAR, reader.column("a").type()); + assertEquals(ObjectType.SCALAR, reader.column(0).type()); + assertEquals(ObjectType.TUPLE, reader.column("m").type()); + assertEquals(ObjectType.TUPLE, reader.column(1).type()); + assertSame(reader.column(1).tuple(), reader.tuple(1)); + + ScalarReader aReader = reader.column(0).scalar(); + TupleReader mReader = reader.column(1).tuple(); + assertEquals(ObjectType.SCALAR, mReader.column("b").array().entryType()); + ScalarElementReader bReader = mReader.column(0).elements(); + assertEquals(ValueType.INTEGER, bReader.valueType()); + assertTrue(reader.next()); - assertEquals(0, reader.column(0).getDouble(), 0.000001); + assertEquals(10, aReader.getInt()); + assertEquals(11, bReader.getInt(0)); + assertEquals(12, bReader.getInt(1)); assertTrue(reader.next()); - assertEquals((double) Float.MAX_VALUE, reader.column(0).getDouble(), 0.000001); - assertEquals((double) Float.MAX_VALUE, (double) reader.column(0).getObject(), 0.000001); + assertEquals(20, aReader.getInt()); + assertEquals(21, bReader.getInt(0)); + assertEquals(22, bReader.getInt(1)); assertTrue(reader.next()); - assertEquals((double) Float.MIN_VALUE, reader.column(0).getDouble(), 0.000001); + assertEquals(30, aReader.getInt()); + assertEquals(31, bReader.getInt(0)); + assertEquals(32, bReader.getInt(1)); assertFalse(reader.next()); - rs.clear(); - } - @Test - public void testDoubleRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.FLOAT8) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0D) - .add(Double.MAX_VALUE) - .add(Double.MIN_VALUE) + // Verify that the map accessor's value count was set. + + @SuppressWarnings("resource") + MapVector mapVector = (MapVector) actual.container().getValueVector(1).getValueVector(); + assertEquals(actual.rowCount(), mapVector.getAccessor().getValueCount()); + + SingleRowSet expected = fixture.rowSetBuilder(schema) + .addRow(10, new Object[] {new int[] {11, 12}}) + .addRow(20, new Object[] {new int[] {21, 22}}) + .addRow(30, new Object[] {new int[] {31, 32}}) .build(); - RowSetReader reader = rs.reader(); - assertTrue(reader.next()); - assertEquals(0, reader.column(0).getDouble(), 0.000001); - assertTrue(reader.next()); - assertEquals(Double.MAX_VALUE, reader.column(0).getDouble(), 0.000001); - assertEquals(Double.MAX_VALUE, (double) reader.column(0).getObject(), 0.000001); - assertTrue(reader.next()); - assertEquals(Double.MIN_VALUE, reader.column(0).getDouble(), 0.000001); - assertFalse(reader.next()); - rs.clear(); + new RowSetComparison(expected) + .verifyAndClearAll(actual); } @Test - public void testStringRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.VARCHAR) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add("") - .add("abcd") - .build(); - RowSetReader reader = rs.reader(); + public void testRepeatedMapStructure() { + TupleMetadata schema = new SchemaBuilder() + .add("a", MinorType.INT) + .addMapArray("m") + .add("b", MinorType.INT) + .add("c", MinorType.INT) + .buildMap() + .buildSchema(); + ExtendableRowSet rowSet = fixture.rowSet(schema); + RowSetWriter writer = rowSet.writer(); + + // Map and Int + // Pick out components and lightly test. (Assumes structure + // tested earlier is still valid, so no need to exhaustively + // test again.) + + assertEquals(ObjectType.SCALAR, writer.column("a").type()); + assertEquals(ObjectType.ARRAY, writer.column("m").type()); + + ArrayWriter maWriter = writer.column(1).array(); + assertEquals(ObjectType.TUPLE, maWriter.entryType()); + + TupleWriter mapWriter = maWriter.tuple(); + assertEquals(ObjectType.SCALAR, mapWriter.column("b").type()); + assertEquals(ObjectType.SCALAR, mapWriter.column("c").type()); + + ScalarWriter aWriter = writer.column("a").scalar(); + ScalarWriter bWriter = mapWriter.scalar("b"); + ScalarWriter cWriter = mapWriter.scalar("c"); + assertEquals(ValueType.INTEGER, aWriter.valueType()); + assertEquals(ValueType.INTEGER, bWriter.valueType()); + assertEquals(ValueType.INTEGER, cWriter.valueType()); + + // Write data + + aWriter.setInt(10); + bWriter.setInt(101); + cWriter.setInt(102); + maWriter.save(); // Advance to next array position + bWriter.setInt(111); + cWriter.setInt(112); + maWriter.save(); + writer.save(); + + aWriter.setInt(20); + bWriter.setInt(201); + cWriter.setInt(202); + maWriter.save(); + bWriter.setInt(211); + cWriter.setInt(212); + maWriter.save(); + writer.save(); + + aWriter.setInt(30); + bWriter.setInt(301); + cWriter.setInt(302); + maWriter.save(); + bWriter.setInt(311); + cWriter.setInt(312); + maWriter.save(); + writer.save(); + + // Finish the row set and get a reader. + + SingleRowSet actual = writer.done(); + RowSetReader reader = actual.reader(); + + // Verify reader structure + + assertEquals(ObjectType.SCALAR, reader.column("a").type()); + assertEquals(ObjectType.ARRAY, reader.column("m").type()); + + ArrayReader maReader = reader.column(1).array(); + assertEquals(ObjectType.TUPLE, maReader.entryType()); + + TupleReader mapReader = maReader.tuple(); + assertEquals(ObjectType.SCALAR, mapReader.column("b").type()); + assertEquals(ObjectType.SCALAR, mapReader.column("c").type()); + + ScalarReader aReader = reader.column("a").scalar(); + ScalarReader bReader = mapReader.scalar("b"); + ScalarReader cReader = mapReader.scalar("c"); + assertEquals(ValueType.INTEGER, aReader.valueType()); + assertEquals(ValueType.INTEGER, bReader.valueType()); + assertEquals(ValueType.INTEGER, cReader.valueType()); + + // Row 1: use index accessors + + assertTrue(reader.next()); + assertEquals(10, aReader.getInt()); + TupleReader ixReader = maReader.tuple(0); + assertEquals(101, ixReader.scalar(0).getInt()); + assertEquals(102, ixReader.scalar(1).getInt()); + ixReader = maReader.tuple(1); + assertEquals(111, ixReader.scalar(0).getInt()); + assertEquals(112, ixReader.scalar(1).getInt()); + + // Row 2: use common accessor with explicit positioning, + // but access scalars through the map reader. + assertTrue(reader.next()); - assertEquals("", reader.column(0).getString()); + assertEquals(20, aReader.getInt()); + maReader.setPosn(0); + assertEquals(201, mapReader.scalar(0).getInt()); + assertEquals(202, mapReader.scalar(1).getInt()); + maReader.setPosn(1); + assertEquals(211, mapReader.scalar(0).getInt()); + assertEquals(212, mapReader.scalar(1).getInt()); + + // Row 3: use common accessor for scalars + assertTrue(reader.next()); - assertEquals("abcd", reader.column(0).getString()); - assertEquals("abcd", reader.column(0).getObject()); + assertEquals(30, aReader.getInt()); + maReader.setPosn(0); + assertEquals(301, bReader.getInt()); + assertEquals(302, cReader.getInt()); + maReader.setPosn(1); + assertEquals(311, bReader.getInt()); + assertEquals(312, cReader.getInt()); + assertFalse(reader.next()); - rs.clear(); - } - /** - * Test writing to and reading from a row set with nested maps. - * Map fields are flattened into a logical schema. - */ + // Verify that the map accessor's value count was set. -// @Test -// public void testMap() { -// BatchSchema batchSchema = new SchemaBuilder() -// .add("a", MinorType.INT) -// .addMap("b") -// .add("c", MinorType.INT) -// .add("d", MinorType.INT) -// .buildMap() -// .build(); -// SingleRowSet rs = fixture.rowSetBuilder(batchSchema) -// .add(10, 20, 30) -// .add(40, 50, 60) -// .build(); -// RowSetReader reader = rs.reader(); -// assertTrue(reader.next()); -// assertEquals(10, reader.column(0).getInt()); -// assertEquals(20, reader.column(1).getInt()); -// assertEquals(30, reader.column(2).getInt()); -// assertEquals(10, reader.column("a").getInt()); -// assertEquals(30, reader.column("b.d").getInt()); -// assertTrue(reader.next()); -// assertEquals(40, reader.column(0).getInt()); -// assertEquals(50, reader.column(1).getInt()); -// assertEquals(60, reader.column(2).getInt()); -// assertFalse(reader.next()); -// rs.clear(); -// } + @SuppressWarnings("resource") + RepeatedMapVector mapVector = (RepeatedMapVector) actual.container().getValueVector(1).getValueVector(); + assertEquals(3, mapVector.getAccessor().getValueCount()); + + // Verify the readers and writers again using the testing tools. + + SingleRowSet expected = fixture.rowSetBuilder(schema) + .addRow(10, new Object[] {new Object[] {101, 102}, new Object[] {111, 112}}) + .addRow(20, new Object[] {new Object[] {201, 202}, new Object[] {211, 212}}) + .addRow(30, new Object[] {new Object[] {301, 302}, new Object[] {311, 312}}) + .build(); + new RowSetComparison(expected) + .verifyAndClearAll(actual); + } /** * Test an array of ints (as an example fixed-width type) @@ -382,7 +528,7 @@ public class RowSetTest extends SubOperatorTest { */ @Test - public void TestTopFixedWidthArray() { + public void testTopFixedWidthArray() { BatchSchema batchSchema = new SchemaBuilder() .add("c", MinorType.INT) .addArray("a", MinorType.INT) @@ -390,49 +536,131 @@ public class RowSetTest extends SubOperatorTest { ExtendableRowSet rs1 = fixture.rowSet(batchSchema); RowSetWriter writer = rs1.writer(); - writer.column(0).setInt(10); - ArrayWriter array = writer.column(1).array(); + writer.scalar(0).setInt(10); + ScalarWriter array = writer.array(1).scalar(); array.setInt(100); array.setInt(110); writer.save(); - writer.column(0).setInt(20); - array = writer.column(1).array(); + writer.scalar(0).setInt(20); array.setInt(200); array.setInt(120); array.setInt(220); writer.save(); - writer.column(0).setInt(30); + writer.scalar(0).setInt(30); writer.save(); - writer.done(); - RowSetReader reader = rs1.reader(); + SingleRowSet result = writer.done(); + + RowSetReader reader = result.reader(); assertTrue(reader.next()); - assertEquals(10, reader.column(0).getInt()); - ArrayReader arrayReader = reader.column(1).array(); + assertEquals(10, reader.scalar(0).getInt()); + ScalarElementReader arrayReader = reader.array(1).elements(); assertEquals(2, arrayReader.size()); assertEquals(100, arrayReader.getInt(0)); assertEquals(110, arrayReader.getInt(1)); assertTrue(reader.next()); - assertEquals(20, reader.column(0).getInt()); - arrayReader = reader.column(1).array(); + assertEquals(20, reader.scalar(0).getInt()); assertEquals(3, arrayReader.size()); assertEquals(200, arrayReader.getInt(0)); assertEquals(120, arrayReader.getInt(1)); assertEquals(220, arrayReader.getInt(2)); assertTrue(reader.next()); - assertEquals(30, reader.column(0).getInt()); - arrayReader = reader.column(1).array(); + assertEquals(30, reader.scalar(0).getInt()); assertEquals(0, arrayReader.size()); assertFalse(reader.next()); SingleRowSet rs2 = fixture.rowSetBuilder(batchSchema) - .add(10, new int[] {100, 110}) - .add(20, new int[] {200, 120, 220}) - .add(30, null) + .addRow(10, new int[] {100, 110}) + .addRow(20, new int[] {200, 120, 220}) + .addRow(30, null) .build(); new RowSetComparison(rs1) .verifyAndClearAll(rs2); } + /** + * Test filling a row set up to the maximum number of rows. + * Values are small enough to prevent filling to the + * maximum buffer size. + */ + + @Test + public void testRowBounds() { + BatchSchema batchSchema = new SchemaBuilder() + .add("a", MinorType.INT) + .build(); + + ExtendableRowSet rs = fixture.rowSet(batchSchema); + RowSetWriter writer = rs.writer(); + int count = 0; + while (! writer.isFull()) { + writer.scalar(0).setInt(count++); + writer.save(); + } + writer.done(); + + assertEquals(ValueVector.MAX_ROW_COUNT, count); + // The writer index points past the writable area. + // But, this is fine, the valid() method says we can't + // write at this location. + assertEquals(ValueVector.MAX_ROW_COUNT, writer.rowIndex()); + assertEquals(ValueVector.MAX_ROW_COUNT, rs.rowCount()); + rs.clear(); + } + + /** + * Test filling a row set up to the maximum vector size. + * Values in the first column are small enough to prevent filling to the + * maximum buffer size, but values in the second column + * will reach maximum buffer size before maximum row size. + * The result should be the number of rows that fit, with the + * partial last row not counting. (A complete application would + * reload the partial row into a new row set.) + */ + + @Test + public void testBufferBounds() { + BatchSchema batchSchema = new SchemaBuilder() + .add("a", MinorType.INT) + .add("b", MinorType.VARCHAR) + .build(); + + String varCharValue; + try { + byte rawValue[] = new byte[512]; + Arrays.fill(rawValue, (byte) 'X'); + varCharValue = new String(rawValue, "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new IllegalStateException(e); + } + + ExtendableRowSet rs = fixture.rowSet(batchSchema); + RowSetWriter writer = rs.writer(); + int count = 0; + try { + + // Test overflow. This is not a typical use case: don't want to + // hit overflow without overflow handling. In this case, we throw + // away the last row because the row set abstraction does not + // implement vector overflow other than throwing an exception. + + for (;;) { + writer.scalar(0).setInt(count); + writer.scalar(1).setString(varCharValue); + + // Won't get here on overflow. + writer.save(); + count++; + } + } catch (IndexOutOfBoundsException e) { + assertTrue(e.getMessage().contains("Overflow")); + } + writer.done(); + + assertTrue(count < ValueVector.MAX_ROW_COUNT); + assertEquals(count, writer.rowIndex()); + assertEquals(count, rs.rowCount()); + rs.clear(); + } } http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java new file mode 100644 index 0000000..147b713 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet.test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.ValueType; +import org.apache.drill.test.SubOperatorTest; +import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; +import org.apache.drill.test.rowSet.RowSet.SingleRowSet; +import org.apache.drill.test.rowSet.RowSetReader; +import org.apache.drill.test.rowSet.RowSetUtilities; +import org.apache.drill.test.rowSet.RowSetWriter; +import org.apache.drill.test.rowSet.SchemaBuilder; +import org.junit.Test; + +/** + * Test the "fill empties" logic for all types for all modes. + * This test exploits the dynamic typing ability of the + * accessors. Creating an object per value is too slow for + * production code, but very handy for tests such as this. + * <p> + * Note that this test also has the handy side-effect of testing + * null handling in the accessor classes. + */ + +public class TestFillEmpties extends SubOperatorTest { + + public static final int ROW_COUNT = 1000; + + /** + * Test "fill empties" for required types. Here, the fill value + * is more of a convention: 0 (fixed-width) or an empty + * entry (variable width.) Some fill value is required to avoid + * the alternatives which are either 1) leave the value as + * garbage, or 2) raise an exception about the missing value. + */ + + @Test + public void testFillEmptiesRequired() { + testFillEmpties(DataMode.REQUIRED); + } + + /** + * Test "fill empties" for nullable types which are the most + * "natural" type for omitted values. + * Nullable vectors fill empties with nulls. + */ + + @Test + public void testFillEmptiesNullable() { + testFillEmpties(DataMode.OPTIONAL); + } + + /** + * Test "fill empties" for repeated types. + * Drill defines a null (omitted) array as the same thing as + * a zero-length array. + */ + + @Test + public void testFillEmptiesRepeated() { + testFillEmpties(DataMode.REPEATED); + } + + private void testFillEmpties(DataMode mode) { + for (MinorType type : MinorType.values()) { + switch (type) { + case DECIMAL28DENSE: + case DECIMAL38DENSE: + // Not yet supported + break; + case GENERIC_OBJECT: + case LATE: + case LIST: + case MAP: + case NULL: + case UNION: + // Writer N/A + break; + case BIT: + case FIXEDBINARY: + case FIXEDCHAR: + case FIXED16CHAR: + case MONEY: + case TIMESTAMPTZ: + case TIMETZ: + // Not supported in Drill + break; + case DECIMAL18: + case DECIMAL28SPARSE: + case DECIMAL9: + case DECIMAL38SPARSE: + doFillEmptiesTest(type, mode, 9, 2); + break; + default: + doFillEmptiesTest(type, mode); + } + } + } + + private void doFillEmptiesTest(MinorType type, DataMode mode, int prec, int scale) { + MajorType majorType = MajorType.newBuilder() + .setMinorType(type) + .setMode(mode) + .setPrecision(prec) + .setScale(scale) + .build(); + doFillEmptiesTest(majorType); + } + + private void doFillEmptiesTest(MinorType type, DataMode mode) { + MajorType majorType = MajorType.newBuilder() + .setMinorType(type) + .setMode(mode) + .build(); + doFillEmptiesTest(majorType); + } + + private void doFillEmptiesTest(MajorType majorType) { + if (majorType.getMode() == DataMode.REPEATED) { + dofillEmptiesRepeated(majorType); + } else { + doFillEmptiesScalar(majorType); + } + } + + private void doFillEmptiesScalar(MajorType majorType) { + TupleMetadata schema = new SchemaBuilder() + .add("a", majorType) + .buildSchema(); + ExtendableRowSet rs = fixture.rowSet(schema); + RowSetWriter writer = rs.writer(); + ScalarWriter colWriter = writer.scalar(0); + ValueType valueType = colWriter.valueType(); + boolean nullable = majorType.getMode() == DataMode.OPTIONAL; + for (int i = 0; i < ROW_COUNT; i++) { + if (i % 5 == 0) { + colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i)); + } + writer.save(); + } + SingleRowSet result = writer.done(); + RowSetReader reader = result.reader(); + ScalarReader colReader = reader.scalar(0); + MinorType type = majorType.getMinorType(); + boolean isVariable = (type == MinorType.VARCHAR || + type == MinorType.VAR16CHAR || + type == MinorType.VARBINARY); + for (int i = 0; i < ROW_COUNT; i++) { + assertTrue(reader.next()); + if (i % 5 != 0) { + if (nullable) { + // Nullable types fill with nulls. + + assertTrue(colReader.isNull()); + continue; + } + if (isVariable) { + // Variable width types fill with a zero-length value. + + assertEquals(0, colReader.getBytes().length); + continue; + } + } + + // All other types fill with zero-bytes, interpreted as some form + // of zero for each type. + + Object actual = colReader.getObject(); + Object expected = RowSetUtilities.testDataFromInt(valueType, majorType, + i % 5 == 0 ? i : 0); + RowSetUtilities.assertEqualValues( + majorType.toString().replace('\n', ' ') + "[" + i + "]", + valueType, expected, actual); + } + result.clear(); + } + + private void dofillEmptiesRepeated(MajorType majorType) { + TupleMetadata schema = new SchemaBuilder() + .add("a", majorType) + .buildSchema(); + ExtendableRowSet rs = fixture.rowSet(schema); + RowSetWriter writer = rs.writer(); + ScalarWriter colWriter = writer.array(0).scalar(); + ValueType valueType = colWriter.valueType(); + for (int i = 0; i < ROW_COUNT; i++) { + if (i % 5 == 0) { + // Write two values so we can exercise a bit of the array logic. + + colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i)); + colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i+1)); + } + writer.save(); + } + SingleRowSet result = writer.done(); + RowSetReader reader = result.reader(); + ScalarElementReader colReader = reader.array(0).elements(); + for (int i = 0; i < ROW_COUNT; i++) { + assertTrue(reader.next()); + if (i % 5 != 0) { + // Empty arrays are defined to be the same as a zero-length array. + + assertEquals(0, colReader.size()); + } else { + for (int j = 0; j < 2; j++) { + Object actual = colReader.getObject(j); + Object expected = RowSetUtilities.testDataFromInt(valueType, majorType, i + j); + RowSetUtilities.assertEqualValues( + majorType.toString().replace('\n', ' ') + "[" + i + "][" + j + "]", + valueType, expected, actual); + } + } + } + result.clear(); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFixedWidthWriter.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFixedWidthWriter.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFixedWidthWriter.java new file mode 100644 index 0000000..a27fdf4 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFixedWidthWriter.java @@ -0,0 +1,444 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet.test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.vector.IntVector; +import org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener; +import org.apache.drill.exec.vector.accessor.ValueType; +import org.apache.drill.test.SubOperatorTest; +import org.apache.drill.test.rowSet.SchemaBuilder; +import org.junit.Test; + +/** + * Test the int writer as a typical example of a fixed-width + * writer. Exercises normal writing, writing after a (simulated) + * overflow, and filling in empty values. + */ + +public class TestFixedWidthWriter extends SubOperatorTest { + + public static class TestIndex implements ColumnWriterIndex { + + public int index; + + @Override + public int vectorIndex() { return index; } + + @Override + public void nextElement() { } + + @Override + public void rollover() { } + + @Override + public int rowStartIndex() { return index; } + + @Override + public ColumnWriterIndex outerIndex() { return null; } + } + + /** + * Basic test to write a contiguous set of values, enough to cause + * the vector to double in size twice, then read back the values. + */ + + @Test + public void testWrite() { + try (IntVector vector = allocVector(1000)) { + TestIndex index = new TestIndex(); + IntColumnWriter writer = makeWriter(vector, index); + + writer.startWrite(); + + // Write integers. + // Write enough that the vector is resized. + + long origAddr = vector.getBuffer().addr(); + for (int i = 0; i < 3000; i++) { + index.index = i; + writer.setInt(i * 10); + } + writer.endWrite(); + + // Should have been reallocated. + + assertNotEquals(origAddr, vector.getBuffer().addr()); + + // Verify values + + for (int i = 0; i < 3000; i++) { + assertEquals(i * 10, vector.getAccessor().get(i)); + } + } + } + + @Test + public void testRestartRow() { + try (IntVector vector = allocVector(1000)) { + TestIndex index = new TestIndex(); + IntColumnWriter writer = makeWriter(vector, index); + writer.startWrite(); + + // Write rows, rewriting every other row. + + writer.startRow(); + index.index = 0; + for (int i = 0; i < 50; i++) { + writer.setInt(i); + if (i % 2 == 0) { + writer.saveRow(); + writer.startRow(); + index.index++; + } else { + writer.restartRow(); + } + } + writer.endWrite(); + + // Verify values + + for (int i = 0; i < 25; i++) { + assertEquals(2 * i, vector.getAccessor().get(i)); + } + } + } + + /** + * Required, fixed-width vectors are back-filling with 0 to fill in missing + * values. While using zero is not strictly SQL compliant, it is better + * than failing. (The SQL solution would be to fill with nulls, but a + * required vector does not support nulls...) + */ + + @Test + public void testFillEmpties() { + try (IntVector vector = allocVector(1000)) { + TestIndex index = new TestIndex(); + IntColumnWriter writer = makeWriter(vector, index); + writer.startWrite(); + + // Write values, skipping four out of five positions, + // forcing backfill. + // The number of values is odd, forcing the writer to + // back-fill at the end as well as between values. + // Keep the number of values below the allocation so + // that we know all values were initially garbage-filled. + + for (int i = 0; i < 501; i += 5) { + index.index = i; + writer.startRow(); + writer.setInt(i); + writer.saveRow(); + } + // At end, vector index defined to point one past the + // last row. That is, the vector index gives the row count. + + index.index = 504; + writer.endWrite(); + + // Verify values + + for (int i = 0; i < 504; i++) { + assertEquals("Mismatch on " + i, + (i%5) == 0 ? i : 0, + vector.getAccessor().get(i)); + } + } + } + + /** + * The rollover method is used during vector overflow. + */ + + @Test + public void testRollover() { + try (IntVector vector = allocVector(1000)) { + TestIndex index = new TestIndex(); + IntColumnWriter writer = makeWriter(vector, index); + writer.startWrite(); + + // Simulate doing an overflow of ten values. + + for (int i = 0; i < 10; i++) { + index.index = i; + writer.startRow(); + writer.setInt(i); + writer.saveRow(); + } + + // Overflow occurs after writing the 11th row + + index.index = 10; + writer.startRow(); + writer.setInt(10); + + // Overflow occurs + + writer.preRollover(); + + // Simulate rollover + + for (int i = 0; i < 15; i++) { + vector.getMutator().set(i, 0xdeadbeef); + } + vector.getMutator().set(0, 10); + + writer.postRollover(); + index.index = 0; + writer.saveRow(); + + // Simulate resuming with a few more values. + + for (int i = 1; i < 5; i++) { + index.index = i; + writer.startRow(); + writer.setInt(10 + i); + writer.saveRow(); + } + writer.endWrite(); + + // Verify the results + + for (int i = 0; i < 5; i++) { + assertEquals(10 + i, vector.getAccessor().get(i)); + } + } + } + + /** + * Simulate the case in which the tail end of an overflow + * batch has empties. <tt>preRollover()</tt> should back-fill + * them with the next offset prior to rollover. + */ + + @Test + public void testRolloverWithEmpties() { + try (IntVector vector = allocVector(1000)) { + TestIndex index = new TestIndex(); + IntColumnWriter writer = makeWriter(vector, index); + writer.startWrite(); + + // Simulate doing an overflow of 15 values, + // of which 5 are empty. + + for (int i = 0; i < 10; i++) { + index.index = i; + writer.startRow(); + writer.setInt(i); + writer.saveRow(); + } + + for (int i = 10; i < 15; i++) { + index.index = i; + writer.startRow(); + writer.saveRow(); + } + + // Overflow occurs before writing the 16th row + + index.index = 15; + writer.startRow(); + + // Overflow occurs. This should fill empty offsets. + + writer.preRollover(); + + // Verify the first "batch" results + + for (int i = 0; i < 10; i++) { + assertEquals(i, vector.getAccessor().get(i)); + } + for (int i = 10; i < 15; i++) { + assertEquals(0, vector.getAccessor().get(i)); + } + + // Simulate rollover + + for (int i = 0; i < 20; i++) { + vector.getMutator().set(i, 0xdeadbeef); + } + + writer.postRollover(); + index.index = 0; + writer.saveRow(); + + // Skip more values. + + for (int i = 1; i < 5; i++) { + index.index = i; + writer.startRow(); + writer.saveRow(); + } + + // Simulate resuming with a few more values. + + for (int i = 5; i < 10; i++) { + index.index = i; + writer.startRow(); + writer.setInt(i + 20); + writer.saveRow(); + } + writer.endWrite(); + + // Verify the results + + for (int i = 0; i < 5; i++) { + assertEquals(0, vector.getAccessor().get(i)); + } + for (int i = 5; i < 10; i++) { + assertEquals(i + 20, vector.getAccessor().get(i)); + } + } + } + + /** + * Test the case in which a scalar vector is used in conjunction + * with a nullable bits vector. The nullable vector will call the + * <tt>skipNulls()</tt> method to avoid writing values for null + * entries. (Without the call, the scalar writer will fill the + * empty values with zeros.) + */ + + @Test + public void testSkipNulls() { + try (IntVector vector = allocVector(1000)) { + TestIndex index = new TestIndex(); + IntColumnWriter writer = makeWriter(vector, index); + writer.startWrite(); + + // Write values, skipping four out of five positions, + // skipping nulls. + // The loop will cause the vector to double in size. + // The number of values is odd, forcing the writer to + // skip nulls at the end as well as between values. + + long origAddr = vector.getBuffer().addr(); + for (int i = 0; i < 3000; i += 5) { + index.index = i; + writer.startRow(); + writer.skipNulls(); + writer.setInt(i); + writer.saveRow(); + } + index.index = 3003; + writer.startRow(); + writer.skipNulls(); + writer.saveRow(); + writer.endWrite(); + + // Should have been reallocated. + + assertNotEquals(origAddr, vector.getBuffer().addr()); + + // Verify values. First 1000 were filled with known + // garbage values. + + for (int i = 0; i < 1000; i++) { + assertEquals("Mismatch at " + i, + (i%5) == 0 ? i : 0xdeadbeef, + vector.getAccessor().get(i)); + } + + // Next values are filled with unknown values: + // whatever was left in the buffer allocated by Netty. + + for (int i = 1005; i < 3000; i+= 5) { + assertEquals(i, vector.getAccessor().get(i)); + } + } + } + + /** + * Test resize monitoring. Add a listener to an int writer, + * capture each resize, and refuse a resize when the number + * of ints exceeds 8K values. This will trigger an overflow, + * which will throw an exception which we then check for. + */ + + @Test + public void testSizeLimit() { + try (IntVector vector = allocVector(1000)) { + TestIndex index = new TestIndex(); + IntColumnWriter writer = makeWriter(vector, index); + writer.bindListener(new ColumnWriterListener() { + int totalAlloc = 4096; + + @Override + public void overflowed(ScalarWriter writer) { + throw new IllegalStateException("overflow called"); + } + + @Override + public boolean canExpand(ScalarWriter writer, int delta) { +// System.out.println("Delta: " + delta); + totalAlloc += delta; + return totalAlloc < 16_384 * 4; + } + }); + writer.startWrite(); + try { + for (int i = 0; ; i++ ) { + index.index = i; + writer.startRow(); + writer.setInt(i); + writer.saveRow(); + } + } + catch(IllegalStateException e) { + assertTrue(e.getMessage().contains("overflow called")); + } + + // Should have failed on 8192, which doubled vector + // to 16K, which was rejected. + + assertEquals(8192, index.index); + } + } + + private IntVector allocVector(int size) { + MaterializedField field = + SchemaBuilder.columnSchema("x", MinorType.INT, DataMode.REQUIRED); + IntVector vector = new IntVector(field, fixture.allocator()); + vector.allocateNew(size); + + // Party on the bytes of the vector so we start dirty + + for (int i = 0; i < size; i++) { + vector.getMutator().set(i, 0xdeadbeef); + } + return vector; + } + + private IntColumnWriter makeWriter(IntVector vector, TestIndex index) { + IntColumnWriter writer = new IntColumnWriter(vector); + writer.bindIndex(index); + + assertEquals(ValueType.INTEGER, writer.valueType()); + return writer; + } +}