This is an automated email from the ASF dual-hosted git repository. arina pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 8fb85cd4370e6143641cda1ad5b998caca0b6bf7 Author: Paul Rogers <prog...@cloudera.com> AuthorDate: Sun Jan 27 10:32:46 2019 -0800 DRILL-7006: Add type conversion to row writers Modifies the column metadata and writer abstractions to allow a type conversion "shim" to be specified as part of the schema, then inserted as part of the row set writer. Allows, say, setting an Int or Date from a string, parsing the string to obtain the proper data type to store in the vector. Type conversion not yet supported in the result set loader: some additional complexity needs to be resolved. Adds unit tests for this functionality. Refactors some existing tests to remove rough edges. closes #1623 --- .../record/metadata/AbstractColumnMetadata.java | 16 ++ .../record/metadata/PrimitiveColumnMetadata.java | 56 ++++++- .../rowSet/impl/TestResultSetLoaderProtocol.java | 54 ++++++ .../test/rowSet/test/TestColumnConvertor.java | 145 ++++++++++++++++ .../test/{RowSetTest.java => TestRowSet.java} | 45 +++-- .../rowSet/{ => test}/TestRowSetComparison.java | 5 +- .../drill/exec/record/metadata/ColumnMetadata.java | 40 +++++ .../vector/accessor/ColumnConversionFactory.java | 40 +++++ .../accessor/UnsupportedConversionError.java | 2 +- .../accessor/writer/AbstractScalarWriter.java | 52 ++---- .../accessor/writer/AbstractWriteConvertor.java | 186 +++++++++++++++++++++ .../vector/accessor/writer/ConcreteWriter.java | 69 ++++++++ .../vector/accessor/writer/ScalarArrayWriter.java | 24 +-- 13 files changed, 654 insertions(+), 80 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java index 595c5c4..63dda07 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java @@ -21,6 +21,8 @@ import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.vector.accessor.ColumnConversionFactory; +import org.apache.drill.exec.vector.accessor.UnsupportedConversionError; /** * Abstract definition of column metadata. Allows applications to create @@ -179,6 +181,20 @@ public abstract class AbstractColumnMetadata implements ColumnMetadata { public boolean isProjected() { return projected; } @Override + public void setDefaultValue(Object value) { } + + @Override + public Object defaultValue() { return null; } + + @Override + public void setTypeConverter(ColumnConversionFactory factory) { + throw new UnsupportedConversionError("Type conversion not supported for non-scalar writers"); + } + + @Override + public ColumnConversionFactory typeConverter() { return null; } + + @Override public String toString() { final StringBuilder buf = new StringBuilder() .append("[") diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java index dfbd4a9..ead6134 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java @@ -23,16 +23,52 @@ import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.common.types.Types; import org.apache.drill.exec.expr.TypeHelper; import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.vector.accessor.ColumnConversionFactory; /** - * Primitive (non-map) column. Describes non-nullable, nullable and - * array types (which differ only in mode, but not in metadata structure.) + * Primitive (non-map) column. Describes non-nullable, nullable and array types + * (which differ only in mode, but not in metadata structure.) + * <p> + * Metadata is of two types: + * <ul> + * <li>Storage metadata that describes how the column is materialized in a + * vector. Storage metadata is immutable because revising an existing vector is + * a complex operation.</li> + * <li>Supplemental metadata used when reading or writing the column. + * Supplemental metadata can be changed after the column is created, though it + * should generally be set before invoking code that uses the metadata.</li> + * </ul> */ public class PrimitiveColumnMetadata extends AbstractColumnMetadata { + /** + * Expected (average) width for variable-width columns. + */ + private int expectedWidth; + /** + * Default value to use for filling a vector when no real data is + * available, such as for columns added in new files but which does not + * exist in existing files. The ultimate default value is the SQL null + * value, which works only for nullable columns. + */ + + private Object defaultValue; + + /** + * Factory for an optional shim writer that translates from the type of + * data available to the code that creates the vectors on the one hand, + * and the actual type of the column on the other. For example, a shim + * might parse a string form of a date into the form stored in vectors. + * <p> + * The default is to use the "natural" type: that is, to insert no + * conversion shim. + */ + + private ColumnConversionFactory shimFactory; + public PrimitiveColumnMetadata(MaterializedField schema) { super(schema); expectedWidth = estimateWidth(schema.getType()); @@ -99,6 +135,22 @@ public class PrimitiveColumnMetadata extends AbstractColumnMetadata { } @Override + public void setDefaultValue(Object value) { + defaultValue = value; + } + + @Override + public Object defaultValue() { return defaultValue; } + + @Override + public void setTypeConverter(ColumnConversionFactory factory) { + shimFactory = factory; + } + + @Override + public ColumnConversionFactory typeConverter() { return shimFactory; } + + @Override public ColumnMetadata cloneEmpty() { return new PrimitiveColumnMetadata(this); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java index 088e8f4..8fb600d 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java @@ -17,6 +17,7 @@ */ package org.apache.drill.exec.physical.rowSet.impl; +import static org.apache.drill.test.rowSet.RowSetUtilities.intArray; import static org.apache.drill.test.rowSet.RowSetUtilities.strArray; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -43,8 +44,10 @@ import org.apache.drill.exec.vector.accessor.TupleWriter.UndefinedColumnExceptio import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; +import org.apache.drill.test.rowSet.test.TestColumnConvertor.TestConvertor; import org.apache.drill.test.rowSet.RowSetReader; import org.apache.drill.test.rowSet.RowSetUtilities; +import org.junit.Ignore; import org.junit.Test; /** @@ -602,4 +605,55 @@ public class TestResultSetLoaderProtocol extends SubOperatorTest { rsLoader.close(); } + + /** + * Test the use of a column type converter in the result set loader for + * required, nullable and repeated columns. + */ + + @Ignore("Not yet") + @Test + public void testTypeConversion() { + TupleMetadata schema = new SchemaBuilder() + .add("n1", MinorType.INT) + .addNullable("n2", MinorType.INT) + .addArray("n3", MinorType.INT) + .buildSchema(); + + // Add a type convertor. Passed in as a factory + // since we must create a new one for each row set writer. + + schema.metadata("n1").setTypeConverter(TestConvertor.factory()); + schema.metadata("n2").setTypeConverter(TestConvertor.factory()); + schema.metadata("n3").setTypeConverter(TestConvertor.factory()); + + ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder() + .setSchema(schema) + .setRowCountLimit(ValueVector.MAX_ROW_COUNT) + .build(); + ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options); + + // Write data as both a string as an integer + + RowSetLoader rootWriter = rsLoader.writer(); + rootWriter.addRow("123", "12", strArray("123", "124")); + rootWriter.addRow(234, 23, intArray(234, 235)); + RowSet actual = fixture.wrap(rsLoader.harvest()); + + // Build the expected vector without a type convertor. + + TupleMetadata expectedSchema = new SchemaBuilder() + .add("n1", MinorType.INT) + .addNullable("n2", MinorType.INT) + .addArray("n3", MinorType.INT) + .buildSchema(); + final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addRow(123, 12, intArray(123, 124)) + .addRow(234, 23, intArray(234, 235)) + .build(); + + // Compare + + RowSetUtilities.verify(expected, actual); + } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java new file mode 100644 index 0000000..b7d865d --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet.test; + +import static org.apache.drill.test.rowSet.RowSetUtilities.strArray; +import static org.apache.drill.test.rowSet.RowSetUtilities.intArray; + +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.SchemaBuilder; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.exec.vector.accessor.ColumnConversionFactory; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.writer.AbstractWriteConvertor; +import org.apache.drill.exec.vector.accessor.writer.ConcreteWriter; +import org.apache.drill.test.SubOperatorTest; +import org.apache.drill.test.rowSet.RowSet; +import org.apache.drill.test.rowSet.RowSetBuilder; +import org.apache.drill.test.rowSet.RowSetUtilities; +import org.apache.drill.test.rowSet.RowSet.SingleRowSet; +import org.junit.Test; + +/** + * Tests the column type convertor feature of the column metadata + * and of the RowSetWriter. + */ + +public class TestColumnConvertor extends SubOperatorTest { + + /** + * Simple type converter that allows string-to-int conversions. + * Inherits usual int value support from the base writer. + */ + public static class TestConvertor extends AbstractWriteConvertor { + + public TestConvertor(ScalarWriter baseWriter) { + super(baseWriter); + } + + @Override + public void setString(String value) { + setInt(Integer.parseInt(value)); + } + + public static ColumnConversionFactory factory() { + return new ColumnConversionFactory() { + @Override + public ConcreteWriter newWriter(ColumnMetadata colDefn, + ConcreteWriter baseWriter) { + return new TestConvertor(baseWriter); + } + }; + } + } + + @Test + public void testScalarConvertor() { + + // Create the schema + + TupleMetadata schema = new SchemaBuilder() + .add("n1", MinorType.INT) + .addNullable("n2", MinorType.INT) + .buildSchema(); + + // Add a type convertor. Passed in as a factory + // since we must create a new one for each row set writer. + + schema.metadata("n1").setTypeConverter(TestConvertor.factory()); + schema.metadata("n2").setTypeConverter(TestConvertor.factory()); + + // Write data as both a string as an integer + + RowSet actual = new RowSetBuilder(fixture.allocator(), schema) + .addRow("123", "12") + .addRow(234, 23) + .build(); + + // Build the expected vector without a type convertor. + + TupleMetadata expectedSchema = new SchemaBuilder() + .add("n1", MinorType.INT) + .addNullable("n2", MinorType.INT) + .buildSchema(); + final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addRow(123, 12) + .addRow(234, 23) + .build(); + + // Compare + + RowSetUtilities.verify(expected, actual); + } + + @Test + public void testArrayConvertor() { + + // Create the schema + + TupleMetadata schema = new SchemaBuilder() + .addArray("n", MinorType.INT) + .buildSchema(); + + // Add a type convertor. Passed in as a factory + // since we must create a new one for each row set writer. + + schema.metadata("n").setTypeConverter(TestConvertor.factory()); + + // Write data as both a string as an integer + + RowSet actual = new RowSetBuilder(fixture.allocator(), schema) + .addSingleCol(strArray("123", "124")) + .addSingleCol(intArray(234, 235)) + .build(); + + // Build the expected vector without a type convertor. + + TupleMetadata expectedSchema = new SchemaBuilder() + .addArray("n", MinorType.INT) + .buildSchema(); + final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addSingleCol(intArray(123, 124)) + .addSingleCol(intArray(234, 235)) + .build(); + + // Compare + + RowSetUtilities.verify(expected, actual); + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java similarity index 96% rename from exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java rename to exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java index ee5c599..b660672 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java @@ -29,7 +29,6 @@ import java.io.UnsupportedEncodingException; import java.util.Arrays; import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.vector.ValueVector; @@ -47,8 +46,8 @@ import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.DirectRowSet; import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; -import org.apache.drill.test.rowSet.RowSetComparison; import org.apache.drill.test.rowSet.RowSetReader; +import org.apache.drill.test.rowSet.RowSetUtilities; import org.apache.drill.test.rowSet.RowSetWriter; import org.junit.Test; @@ -72,8 +71,8 @@ import org.junit.Test; * A list is an array of variants. Variants are tested elsewhere. */ -public class RowSetTest extends SubOperatorTest { - private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(RowSetTest.class); +public class TestRowSet extends SubOperatorTest { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestRowSet.class); /** * Test the simplest constructs: a row with top-level scalar @@ -169,7 +168,7 @@ public class RowSetTest extends SubOperatorTest { .addRow(30) .addRow(40) .build(); - new RowSetComparison(expected).verifyAndClearAll(actual); + RowSetUtilities.verify(expected, actual); } /** @@ -303,8 +302,7 @@ public class RowSetTest extends SubOperatorTest { .addSingleCol(intArray(30)) .addSingleCol(intArray(40, 41)) .build(); - new RowSetComparison(expected) - .verifyAndClearAll(actual); + RowSetUtilities.verify(expected, actual); } /** @@ -438,8 +436,7 @@ public class RowSetTest extends SubOperatorTest { .addRow(20, objArray(intArray(21, 22))) .addRow(30, objArray(intArray(31, 32))) .build(); - new RowSetComparison(expected) - .verifyAndClearAll(actual); + RowSetUtilities.verify(expected, actual); } @Test @@ -583,8 +580,7 @@ public class RowSetTest extends SubOperatorTest { .addRow(20, objArray(objArray(201, 202), objArray(211, 212))) .addRow(30, objArray(objArray(301, 302), objArray(311, 312))) .build(); - new RowSetComparison(expected) - .verifyAndClearAll(actual); + RowSetUtilities.verify(expected, actual); } /** @@ -594,12 +590,12 @@ public class RowSetTest extends SubOperatorTest { @Test public void testTopFixedWidthArray() { - final BatchSchema batchSchema = new SchemaBuilder() + final TupleMetadata schema = new SchemaBuilder() .add("c", MinorType.INT) .addArray("a", MinorType.INT) - .build(); + .buildSchema(); - final ExtendableRowSet rs1 = fixture.rowSet(batchSchema); + final ExtendableRowSet rs1 = fixture.rowSet(schema); final RowSetWriter writer = rs1.writer(); writer.scalar(0).setInt(10); final ScalarWriter array = writer.array(1).scalar(); @@ -644,14 +640,13 @@ public class RowSetTest extends SubOperatorTest { assertEquals(0, arrayReader.size()); assertFalse(reader.next()); - final SingleRowSet rs2 = fixture.rowSetBuilder(batchSchema) + final SingleRowSet rs2 = fixture.rowSetBuilder(schema) .addRow(10, intArray(100, 110)) .addRow(20, intArray(200, 120, 220)) .addRow(30, null) .build(); - new RowSetComparison(rs1) - .verifyAndClearAll(rs2); + RowSetUtilities.verify(rs1, rs2); } /** * Test filling a row set up to the maximum number of rows. @@ -661,11 +656,11 @@ public class RowSetTest extends SubOperatorTest { @Test public void testRowBounds() { - final BatchSchema batchSchema = new SchemaBuilder() + final TupleMetadata schema = new SchemaBuilder() .add("a", MinorType.INT) - .build(); + .buildSchema(); - final ExtendableRowSet rs = fixture.rowSet(batchSchema); + final ExtendableRowSet rs = fixture.rowSet(schema); final RowSetWriter writer = rs.writer(); int count = 0; while (! writer.isFull()) { @@ -695,10 +690,10 @@ public class RowSetTest extends SubOperatorTest { @Test public void testBufferBounds() { - final BatchSchema batchSchema = new SchemaBuilder() + final TupleMetadata schema = new SchemaBuilder() .add("a", MinorType.INT) .add("b", MinorType.VARCHAR) - .build(); + .buildSchema(); String varCharValue; try { @@ -709,7 +704,7 @@ public class RowSetTest extends SubOperatorTest { throw new IllegalStateException(e); } - final ExtendableRowSet rs = fixture.rowSet(batchSchema); + final ExtendableRowSet rs = fixture.rowSet(schema); final RowSetWriter writer = rs.writer(); int count = 0; try { @@ -751,14 +746,14 @@ public class RowSetTest extends SubOperatorTest { // will be provided by a reader, by an incoming batch, // etc. - final BatchSchema schema = new SchemaBuilder() + final TupleMetadata schema = new SchemaBuilder() .add("a", MinorType.VARCHAR) .addArray("b", MinorType.INT) .addMap("c") .add("c1", MinorType.INT) .add("c2", MinorType.VARCHAR) .resumeSchema() - .build(); + .buildSchema(); // Step 2: Create a batch. Done here because this is // a batch-oriented test. Done automatically in the diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/TestRowSetComparison.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSetComparison.java similarity index 96% rename from exec/java-exec/src/test/java/org/apache/drill/test/rowSet/TestRowSetComparison.java rename to exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSetComparison.java index 062dd56..867f61f 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/TestRowSetComparison.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSetComparison.java @@ -15,13 +15,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.test.rowSet; +package org.apache.drill.test.rowSet.test; import org.apache.drill.common.types.TypeProtos; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.memory.RootAllocator; import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.test.rowSet.RowSet; +import org.apache.drill.test.rowSet.RowSetBuilder; +import org.apache.drill.test.rowSet.RowSetComparison; import org.junit.After; import org.junit.Before; import org.junit.Test; diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java index 0e0fb49..85d7d25 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java @@ -21,6 +21,7 @@ import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.vector.accessor.ColumnConversionFactory; /** * Metadata description of a column including names, types and structure @@ -182,6 +183,45 @@ public interface ColumnMetadata { int expectedElementCount(); /** + * Set the default value to use for filling a vector when no real data is + * available, such as for columns added in new files but which does not + * exist in existing files. The "default default" is null, which works + * only for nullable columns. + * + * @param value column value, represented as a Java object, acceptable + * to the {@link ColumnWriter#setObject()} method for this column's writer. + */ + void setDefaultValue(Object value); + + /** + * Returns the default value for this column. + * + * @return the default value, or null if no default value has been set + */ + Object defaultValue(); + + /** + * Set the factory for an optional shim writer that translates from the type of + * data available to the code that creates the vectors on the one hand, + * and the actual type of the column on the other. For example, a shim + * might parse a string form of a date into the form stored in vectors. + * <p> + * The shim must write to the base vector for this column using one of + * the supported base writer "set" methods. + * <p> + * The default is to use the "natural" type: that is, to insert no + * conversion shim. + */ + void setTypeConverter(ColumnConversionFactory factory); + + /** + * Returns the type conversion shim for this column. + * + * @return the type conversion factory, or null if none is set + */ + ColumnConversionFactory typeConverter(); + + /** * Create an empty version of this column. If the column is a scalar, * produces a simple copy. If a map, produces a clone without child * columns. diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java new file mode 100644 index 0000000..02efd6d --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor; + +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.vector.accessor.writer.ConcreteWriter; + +/** + * Create a column type converter for the given column and base writer. + * The new writer is expected to be a "shim" writer that implements + * additional "set" methods to convert data from the type that the + * client requires to the type required by the underlying vector as + * represented by the base writer. + */ +public interface ColumnConversionFactory { + /** + * Create a type conversion writer for the given column, converting data + * to the type needed by the base writer. + * @param colDefn column metadata definition + * @param baseWriter base column writer for the column's vector + * @return a new scalar writer to insert between the client and + * the base vector + */ + ConcreteWriter newWriter(ColumnMetadata colDefn, ConcreteWriter baseWriter); +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java index dee2612..68ed5e0 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java @@ -28,7 +28,7 @@ public class UnsupportedConversionError extends UnsupportedOperationException { private static final long serialVersionUID = 1L; - private UnsupportedConversionError(String message) { + public UnsupportedConversionError(String message) { super(message); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java index 08e4ac3..9c2e986 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java @@ -17,16 +17,13 @@ */ package org.apache.drill.exec.vector.accessor.writer; -import java.math.BigDecimal; - import org.apache.drill.exec.record.metadata.ColumnMetadata; import org.apache.drill.exec.vector.BaseDataValueVector; +import org.apache.drill.exec.vector.accessor.ColumnConversionFactory; import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; import org.apache.drill.exec.vector.accessor.ObjectType; import org.apache.drill.exec.vector.accessor.ScalarWriter; -import org.apache.drill.exec.vector.accessor.UnsupportedConversionError; import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter; -import org.joda.time.Period; /** * Column writer implementation that acts as the basis for the @@ -35,14 +32,20 @@ import org.joda.time.Period; * method(s). */ -public abstract class AbstractScalarWriter implements ScalarWriter, WriterEvents { +public abstract class AbstractScalarWriter extends ConcreteWriter { public static class ScalarObjectWriter extends AbstractObjectWriter { - private AbstractScalarWriter scalarWriter; + private ConcreteWriter scalarWriter; - public ScalarObjectWriter(AbstractScalarWriter scalarWriter) { - this.scalarWriter = scalarWriter; + public ScalarObjectWriter(ConcreteWriter scalarWriter) { + final ColumnMetadata metadata = scalarWriter.schema(); + final ColumnConversionFactory factory = metadata.typeConverter(); + if (factory == null) { + this.scalarWriter = scalarWriter; + } else { + this.scalarWriter = factory.newWriter(metadata, scalarWriter); + } } @Override @@ -111,40 +114,7 @@ public abstract class AbstractScalarWriter implements ScalarWriter, WriterEvents @Override public void saveRow() { } - protected UnsupportedConversionError conversionError(String javaType) { - return UnsupportedConversionError.writeError(schema(), javaType); - } - @Override - public void setObject(Object value) { - if (value == null) { - setNull(); - } else if (value instanceof Integer) { - setInt((Integer) value); - } else if (value instanceof Long) { - setLong((Long) value); - } else if (value instanceof String) { - setString((String) value); - } else if (value instanceof BigDecimal) { - setDecimal((BigDecimal) value); - } else if (value instanceof Period) { - setPeriod((Period) value); - } else if (value instanceof byte[]) { - byte[] bytes = (byte[]) value; - setBytes(bytes, bytes.length); - } else if (value instanceof Byte) { - setInt((Byte) value); - } else if (value instanceof Short) { - setInt((Short) value); - } else if (value instanceof Double) { - setDouble((Double) value); - } else if (value instanceof Float) { - setDouble((Float) value); - } else { - throw conversionError(value.getClass().getSimpleName()); - } - } - public void dump(HierarchicalFormatter format) { format .startObject(this) diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java new file mode 100644 index 0000000..0d0bc88 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import java.math.BigDecimal; + +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.ValueType; +import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter; +import org.joda.time.Period; + +/** + * Base class for type converting scalar column writers. All methods + * pass through to the base writer. Override selected "set" methods to + * perform the type conversion, such as overriding "setString" to convert + * from a string representation of a value to the actual format. + * <p> + * The {@link #setObject()} method works here: the object is passed + * to this class's set methods, allowing, say, setting a string object + * for an int column in the case above. + */ + +// TODO: This organization works fine, but is a bit heavy-weight. +// It may be time to think about separating the pure writer aspect of +// a column writer from its plumbing aspects. That is, the base +// ConcreteWriter class combines the public API (ScalarWriter) with +// the internal implementation (WriterEvents) into a single class. +// Might be worth using composition rather than inheritance to keep +// these aspects distinct. + +public class AbstractWriteConvertor extends ConcreteWriter { + + private final ConcreteWriter baseWriter; + + public AbstractWriteConvertor(ScalarWriter baseWriter) { + this.baseWriter = (ConcreteWriter) baseWriter; + } + + @Override + public ValueType valueType() { + return baseWriter.valueType(); + } + + @Override + public int lastWriteIndex() { + return baseWriter.lastWriteIndex(); + } + + @Override + public void restartRow() { + baseWriter.restartRow(); + } + + @Override + public void endWrite() { + baseWriter.endWrite(); + } + + @Override + public void preRollover() { + baseWriter.preRollover(); + } + + @Override + public void postRollover() { + baseWriter.postRollover(); + } + + @Override + public ObjectType type() { + return baseWriter.type(); + } + + @Override + public boolean nullable() { + return baseWriter.nullable(); + } + + @Override + public ColumnMetadata schema() { + return baseWriter.schema(); + } + + @Override + public void setNull() { + baseWriter.setNull(); + } + + @Override + public int rowStartIndex() { + return baseWriter.rowStartIndex(); + } + + @Override + public int writeIndex() { + return baseWriter.writeIndex(); + } + + @Override + public void bindListener(ColumnWriterListener listener) { + baseWriter.bindListener(listener); + } + + @Override + public void bindIndex(ColumnWriterIndex index) { + baseWriter.bindIndex(index); + } + + @Override + public void startWrite() { + baseWriter.startWrite(); + } + + @Override + public void startRow() { + baseWriter.startRow(); + } + + @Override + public void endArrayValue() { + baseWriter.endArrayValue(); + } + + @Override + public void saveRow() { + baseWriter.saveRow(); + } + + @Override + public void setInt(int value) { + baseWriter.setInt(value); + } + + @Override + public void setLong(long value) { + baseWriter.setLong(value); + } + + @Override + public void setDouble(double value) { + baseWriter.setDouble(value); + } + + @Override + public void setString(String value) { + baseWriter.setString(value); + } + + @Override + public void setBytes(byte[] value, int len) { + baseWriter.setBytes(value, len); + } + + @Override + public void setDecimal(BigDecimal value) { + baseWriter.setDecimal(value); + } + + @Override + public void setPeriod(Period value) { + baseWriter.setPeriod(value); + } + + @Override + public void dump(HierarchicalFormatter format) { + baseWriter.dump(format); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java new file mode 100644 index 0000000..549431f --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import java.math.BigDecimal; + +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.UnsupportedConversionError; +import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter; +import org.joda.time.Period; + +/** + * Base class for concrete scalar column writers including actual vector + * writers, wrappers for nullable types, and shims used to convert types. + */ + +public abstract class ConcreteWriter implements ScalarWriter, WriterEvents { + + @Override + public void setObject(Object value) { + if (value == null) { + setNull(); + } else if (value instanceof Integer) { + setInt((Integer) value); + } else if (value instanceof Long) { + setLong((Long) value); + } else if (value instanceof String) { + setString((String) value); + } else if (value instanceof BigDecimal) { + setDecimal((BigDecimal) value); + } else if (value instanceof Period) { + setPeriod((Period) value); + } else if (value instanceof byte[]) { + final byte[] bytes = (byte[]) value; + setBytes(bytes, bytes.length); + } else if (value instanceof Byte) { + setInt((Byte) value); + } else if (value instanceof Short) { + setInt((Short) value); + } else if (value instanceof Double) { + setDouble((Double) value); + } else if (value instanceof Float) { + setDouble((Float) value); + } else { + throw conversionError(value.getClass().getSimpleName()); + } + } + + protected UnsupportedConversionError conversionError(String javaType) { + return UnsupportedConversionError.writeError(schema(), javaType); + } + + abstract void dump(HierarchicalFormatter format); +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java index 2ac7d45..f271bfa 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java @@ -60,13 +60,17 @@ public class ScalarArrayWriter extends BaseArrayWriter { public final void nextElement() { next(); } } - private final BaseScalarWriter elementWriter; + private final ConcreteWriter elementWriter; public ScalarArrayWriter(ColumnMetadata schema, RepeatedValueVector vector, BaseScalarWriter elementWriter) { super(schema, vector.getOffsetVector(), new ScalarObjectWriter(elementWriter)); - this.elementWriter = elementWriter; + + // Save the writer from the scalar object writer created above + // which may have wrapped the element writer in a type convertor. + + this.elementWriter = (ConcreteWriter) elementObjWriter.scalar(); } public static ArrayObjectWriter build(ColumnMetadata schema, @@ -110,7 +114,7 @@ public class ScalarArrayWriter extends BaseArrayWriter { return; } - String objClass = array.getClass().getName(); + final String objClass = array.getClass().getName(); if (! objClass.startsWith("[")) { throw new IllegalArgumentException( String.format("Argument must be an array. Column `%s`, value = %s", @@ -119,12 +123,12 @@ public class ScalarArrayWriter extends BaseArrayWriter { // Figure out type - char second = objClass.charAt(1); + final char second = objClass.charAt(1); switch ( second ) { case '[': // bytes is represented as an array of byte arrays. - char third = objClass.charAt(2); + final char third = objClass.charAt(2); switch (third) { case 'B': setBytesArray((byte[][]) array); @@ -157,11 +161,11 @@ public class ScalarArrayWriter extends BaseArrayWriter { setBooleanArray((boolean[]) array); break; case 'L': - int posn = objClass.indexOf(';'); + final int posn = objClass.indexOf(';'); // If the array is of type Object, then we have no type info. - String memberClassName = objClass.substring(2, posn); + final String memberClassName = objClass.substring(2, posn); if (memberClassName.equals(String.class.getName())) { setStringArray((String[]) array); } else if (memberClassName.equals(Period.class.getName())) { @@ -215,7 +219,7 @@ public class ScalarArrayWriter extends BaseArrayWriter { public void setIntObjectArray(Integer[] value) { for (int i = 0; i < value.length; i++) { - Integer element = value[i]; + final Integer element = value[i]; if (element == null) { elementWriter.setNull(); } else { @@ -232,7 +236,7 @@ public class ScalarArrayWriter extends BaseArrayWriter { public void setLongObjectArray(Long[] value) { for (int i = 0; i < value.length; i++) { - Long element = value[i]; + final Long element = value[i]; if (element == null) { elementWriter.setNull(); } else { @@ -255,7 +259,7 @@ public class ScalarArrayWriter extends BaseArrayWriter { public void setDoubleObjectArray(Double[] value) { for (int i = 0; i < value.length; i++) { - Double element = value[i]; + final Double element = value[i]; if (element == null) { elementWriter.setNull(); } else {