This is an automated email from the ASF dual-hosted git repository. progers pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push: new 69d397c DRILL-7703: Support for 3+D arrays in EVF JSON loader 69d397c is described below commit 69d397c053f4afb89371e7926ce75815ac78bebd Author: Paul Rogers <par0...@yahoo.com> AuthorDate: Wed Apr 15 11:02:47 2020 -0700 DRILL-7703: Support for 3+D arrays in EVF JSON loader Revises the EVF-based JSON loader to support nested repeated lists. --- .../store/easy/json/loader/JsonLoaderImpl.java | 17 +- .../store/easy/json/loader/JsonLoaderOptions.java | 2 + .../json/loader/RepeatedListValueListener.java | 67 ++-- .../exec/store/easy/json/loader/TupleListener.java | 370 ++++++++++++++------- .../easy/json/loader/UnknownFieldListener.java | 10 +- .../easy/json/parser/JsonStructureParser.java | 45 +-- .../exec/store/easy/json/parser/TokenIterator.java | 8 + .../exec/store/easy/json/loader/TestObjects.java | 3 + .../store/easy/json/loader/TestRepeatedList.java | 165 +++++++-- .../store/easy/json/loader/TestScalarArrays.java | 3 + .../exec/store/easy/json/loader/TestScalars.java | 3 + .../exec/store/easy/json/loader/TestUnknowns.java | 27 ++ .../exec/store/easy/json/loader/TestVariant.java | 3 + .../drill/exec/record/metadata/MetadataUtils.java | 5 + 14 files changed, 529 insertions(+), 199 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderImpl.java index ecfaf4b..b434750 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderImpl.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderImpl.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.drill.common.exceptions.CustomErrorContext; +import org.apache.drill.common.exceptions.EmptyErrorContext; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.exec.physical.resultSet.ResultSetLoader; import org.apache.drill.exec.physical.resultSet.RowSetLoader; @@ -183,6 +184,13 @@ public class JsonLoaderImpl implements JsonLoader, ErrorFactory { } public JsonLoader build() { + // Defaults, primarily for testing. + if (options == null) { + options = new JsonLoaderOptions(); + } + if (errorContext == null) { + errorContext = EmptyErrorContext.INSTANCE; + } return new JsonLoaderImpl(this); } } @@ -313,6 +321,7 @@ public class JsonLoaderImpl implements JsonLoader, ErrorFactory { public RuntimeException syntaxError(JsonParseException e) { throw buildError( UserException.dataReadError(e) + .message("Error parsing JSON - %s", e.getMessage()) .addContext("Syntax error")); } @@ -378,14 +387,6 @@ public class JsonLoaderImpl implements JsonLoader, ErrorFactory { .addContext("JSON type", jsonType.toString())); } - public UserException unsupportedArrayException(String key, int dims) { - return buildError( - UserException.validationError() - .message("JSON reader does not arrays deeper than two levels") - .addContext("Field", key) - .addContext("Array nesting", dims)); - } - @Override public RuntimeException messageParseError(MessageContextException e) { return buildError( diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderOptions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderOptions.java index d982e11..a8b221d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderOptions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoaderOptions.java @@ -29,6 +29,7 @@ import org.apache.drill.exec.store.easy.json.parser.JsonStructureOptions; public class JsonLoaderOptions extends JsonStructureOptions { public boolean readNumbersAsDouble; + public boolean unionEnabled; /** * Drill prior to version 1.18 would read a null string @@ -49,5 +50,6 @@ public class JsonLoaderOptions extends JsonStructureOptions { public JsonLoaderOptions(OptionSet options) { super(options); this.readNumbersAsDouble = options.getBoolean(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + this.unionEnabled = options.getBoolean(ExecConstants.ENABLE_UNION_TYPE_KEY); } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/RepeatedListValueListener.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/RepeatedListValueListener.java index 22bc68d..ba1647d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/RepeatedListValueListener.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/RepeatedListValueListener.java @@ -17,6 +17,8 @@ */ package org.apache.drill.exec.store.easy.json.loader; +import java.util.function.Function; + import org.apache.drill.exec.record.metadata.ColumnMetadata; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.store.easy.json.loader.AbstractArrayListener.ObjectArrayListener; @@ -48,44 +50,65 @@ public class RepeatedListValueListener extends AbstractValueListener { private RepeatedListValueListener(JsonLoaderImpl loader, ObjectWriter writer, ValueListener elementListener) { + this(loader, writer, + new RepeatedArrayListener(loader, writer.schema(), + writer.array(), elementListener)); + } + + private RepeatedListValueListener(JsonLoaderImpl loader, ObjectWriter writer, + RepeatedArrayListener outerArrayListener) { super(loader); this.repeatedListWriter = writer; - this.outerArrayListener = new RepeatedArrayListener(loader, writer.schema(), - writer.array(), elementListener); + this.outerArrayListener = outerArrayListener; } /** * Create a repeated list listener for a scalar value. */ - public static ValueListener repeatedListFor(JsonLoaderImpl loader, ObjectWriter writer) { - ColumnMetadata elementSchema = writer.schema().childSchema(); - return wrapInnerArray(loader, writer, - new ScalarArrayListener(loader, elementSchema, - ScalarListener.listenerFor(loader, writer.array().entry()))); + public static ValueListener multiDimScalarArrayFor(JsonLoaderImpl loader, ObjectWriter writer, int dims) { + return buildOuterArrays(loader, writer, dims, + innerWriter -> + new ScalarArrayListener(loader, innerWriter.schema(), + ScalarListener.listenerFor(loader, innerWriter)) + ); } /** * Create a repeated list listener for a Map. */ - public static ValueListener repeatedObjectListFor(JsonLoaderImpl loader, - ObjectWriter writer, TupleMetadata providedSchema) { - ArrayWriter outerArrayWriter = writer.array(); - ArrayWriter innerArrayWriter = outerArrayWriter.array(); - return wrapInnerArray(loader, writer, - new ObjectArrayListener(loader, innerArrayWriter, - new ObjectValueListener(loader, outerArrayWriter.entry().schema(), - new TupleListener(loader, innerArrayWriter.tuple(), providedSchema)))); + public static ValueListener multiDimObjectArrayFor(JsonLoaderImpl loader, + ObjectWriter writer, int dims, TupleMetadata providedSchema) { + return buildOuterArrays(loader, writer, dims, + innerWriter -> + new ObjectArrayListener(loader, innerWriter.array(), + new ObjectValueListener(loader, innerWriter.array().entry().schema(), + new TupleListener(loader, innerWriter.array().tuple(), providedSchema)))); } /** - * Given the inner array, wrap it to produce the repeated list. + * Create layers of repeated list listeners around the type-specific + * array. If the JSON has three array levels, the outer two are repeated + * lists, the inner is type-specific: say an array of {@code BIGINT} or + * a map array. */ - private static ValueListener wrapInnerArray(JsonLoaderImpl loader, ObjectWriter writer, - ArrayListener innerArrayListener) { - return new RepeatedListValueListener(loader, writer, - new RepeatedListElementListener(loader, - writer.schema(), writer.array().array(), - innerArrayListener)); + public static ValueListener buildOuterArrays(JsonLoaderImpl loader, ObjectWriter writer, int dims, + Function<ObjectWriter, ArrayListener> innerCreator) { + ColumnMetadata colSchema = writer.schema(); + ObjectWriter writers[] = new ObjectWriter[dims]; + writers[0] = writer; + for (int i = 1; i < dims; i++) { + writers[i] = writers[i-1].array().entry(); + } + ArrayListener prevArrayListener = innerCreator.apply(writers[dims - 1]); + RepeatedArrayListener innerArrayListener = null; + for (int i = dims - 2; i >= 0; i--) { + innerArrayListener = new RepeatedArrayListener(loader, colSchema, + writers[i].array(), + new RepeatedListElementListener(loader, colSchema, + writers[i+1].array(), prevArrayListener)); + prevArrayListener = innerArrayListener; + } + return new RepeatedListValueListener(loader, writer, innerArrayListener); } /** diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/TupleListener.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/TupleListener.java index 53ad5c6..493bea9 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/TupleListener.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/TupleListener.java @@ -17,11 +17,11 @@ */ package org.apache.drill.exec.store.easy.json.loader; +import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.common.types.Types; import org.apache.drill.exec.record.metadata.ColumnMetadata; import org.apache.drill.exec.record.metadata.MetadataUtils; -import org.apache.drill.exec.record.metadata.RepeatedListBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.store.easy.json.loader.AbstractArrayListener.ObjectArrayListener; import org.apache.drill.exec.store.easy.json.loader.AbstractArrayListener.ScalarArrayListener; @@ -36,6 +36,7 @@ import org.apache.drill.exec.store.easy.json.parser.ValueListener; import org.apache.drill.exec.vector.accessor.ArrayWriter; import org.apache.drill.exec.vector.accessor.ObjectWriter; import org.apache.drill.exec.vector.accessor.TupleWriter; +import org.apache.drill.shaded.guava.com.google.common.base.Preconditions; /** * Accepts { name : value ... } @@ -164,11 +165,11 @@ public class TupleListener implements ObjectListener { */ @Override public ValueListener addField(String key, ValueDef valueDef) { - ColumnMetadata colSchema = providedColumn(key); - if (colSchema != null) { - return listenerFor(colSchema); + ColumnMetadata providedCol = providedColumn(key); + if (providedCol != null) { + return listenerForSchema(providedCol); } else { - return listenerFor(key, valueDef); + return listenerForValue(key, valueDef); } } @@ -177,101 +178,173 @@ public class TupleListener implements ObjectListener { } /** - * Build a column and its listener based on a provided schema. + * Build a column and its listener based a provided schema. + * The user is responsible to ensure that the provided schema + * accurately reflects the structure of the JSON being parsed. */ - private ValueListener listenerFor(ColumnMetadata colSchema) { - switch (colSchema.structureType()) { - case PRIMITIVE: - if (colSchema.isArray()) { + private ValueListener listenerForSchema(ColumnMetadata providedCol) { + switch (providedCol.structureType()) { + + case PRIMITIVE: { + ColumnMetadata colSchema = providedCol.copy(); + if (providedCol.isArray()) { return scalarArrayListenerFor(colSchema); } else { return scalarListenerFor(colSchema); } - case TUPLE: - if (colSchema.isArray()) { - return objectArrayListenerFor(colSchema); + } + + case TUPLE: { + // Propagate the provided map schema into the object + // listener as a provided tuple schema. + ColumnMetadata colSchema = providedCol.cloneEmpty(); + TupleMetadata providedSchema = providedCol.tupleSchema(); + if (providedCol.isArray()) { + return objectArrayListenerFor(colSchema, providedSchema); } else { - return objectListenerFor(colSchema); + return objectListenerFor(colSchema, providedSchema); } - case VARIANT: - if (colSchema.isArray()) { + } + + case VARIANT: { + // A variant can contain multiple types. The schema does not + // declare the types; rather they are discovered by the reader. + // That is, there is no VARIANT<INT, DOUBLE>, there is just VARIANT. + ColumnMetadata colSchema = providedCol.cloneEmpty(); + if (providedCol.isArray()) { return variantArrayListenerFor(colSchema); } else { return variantListenerFor(colSchema); } + } + case MULTI_ARRAY: - return repeatedListListenerFor(colSchema); + return multiDimArrayListenerForSchema(providedCol); + default: + throw loader.unsupportedType(providedCol); } - throw loader.unsupportedType(colSchema); } /** * Build a column and its listener based on a look-ahead hint. */ - protected ValueListener listenerFor(String key, ValueDef valueDef) { + protected ValueListener listenerForValue(String key, ValueDef valueDef) { if (!valueDef.isArray()) { if (valueDef.type().isUnknown()) { return unknownListenerFor(key); } else if (valueDef.type().isObject()) { - return objectListenerFor(key, null); + return objectListenerForValue(key); } else { - return scalarListenerFor(key, valueDef.type()); + return scalarListenerForValue(key, valueDef.type()); } } else if (valueDef.dimensions() == 1) { if (valueDef.type().isUnknown()) { return unknownArrayListenerFor(key, valueDef); } else if (valueDef.type().isObject()) { - return objectArrayListenerFor(key, null); + return objectArrayListenerForValue(key); } else { - return arrayListenerFor(key, valueDef.type()); + return scalarArrayListenerForValue(key, valueDef.type()); } - } else if (valueDef.dimensions() == 2) { + } else { if (valueDef.type().isUnknown()) { return unknownArrayListenerFor(key, valueDef); } else if (valueDef.type().isObject()) { - return repeatedListOfObjectsListenerFor(key, null); + return multiDimObjectArrayListenerForValue(key, valueDef); } else { - return repeatedListListenerFor(key, valueDef); + return multiDimScalarArrayListenerForValue(key, valueDef); } - } else { - throw loader.unsupportedArrayException(key, valueDef.dimensions()); } } - public ScalarListener scalarListenerFor(String key, JsonType jsonType) { - ColumnMetadata colSchema = MetadataUtils.newScalar(key, - Types.optional(scalarTypeFor(key, jsonType))); - return scalarListenerFor(colSchema); - } - - private ObjectWriter addFieldWriter(ColumnMetadata colSchema) { - int index = tupleWriter.addColumn(colSchema); - return tupleWriter.column(index); + /** + * Create a scalar column and listener given the definition of a JSON + * scalar value. + */ + public ScalarListener scalarListenerForValue(String key, JsonType jsonType) { + return scalarListenerFor(MetadataUtils.newScalar(key, + Types.optional(scalarTypeFor(key, jsonType)))); } + /** + * Create a scalar column and listener given the column schema. + */ public ScalarListener scalarListenerFor(ColumnMetadata colSchema) { return ScalarListener.listenerFor(loader, addFieldWriter(colSchema)); } - public ObjectValueListener objectListenerFor(ColumnMetadata providedCol) { - return objectListenerFor(providedCol.name(), providedCol.tupleSchema()); + /** + * Create a scalar array column and listener given the definition of a JSON + * array of scalars. + */ + public ArrayValueListener scalarArrayListenerForValue(String key, JsonType jsonType) { + return scalarArrayListenerFor(MetadataUtils.newScalar(key, + Types.repeated(scalarTypeFor(key, jsonType)))); + } + + /** + * Create a multi- (2+) dimensional scalar array from a JSON value description. + */ + private ValueListener multiDimScalarArrayListenerForValue(String key, ValueDef valueDef) { + return multiDimScalarArrayListenerFor( + repeatedListSchemaFor(key, valueDef.dimensions(), + MetadataUtils.newScalar(key, scalarTypeFor(key, valueDef.type()), DataMode.REPEATED)), + valueDef.dimensions()); + } + + /** + * Create a multi- (2+) dimensional scalar array from a column schema and dimension + * count hint. + */ + private ValueListener multiDimScalarArrayListenerFor(ColumnMetadata colSchema, int dims) { + return RepeatedListValueListener.multiDimScalarArrayFor(loader, + addFieldWriter(colSchema), dims); + } + + /** + * Create a scalar array column and array listener for the given column + * schema. + */ + public ArrayValueListener scalarArrayListenerFor(ColumnMetadata colSchema) { + return new ScalarArrayValueListener(loader, colSchema, + new ScalarArrayListener(loader, colSchema, + scalarListenerFor(colSchema))); } - public ObjectValueListener objectListenerFor(String key, TupleMetadata providedSchema) { + /** + * Create a map column and its associated object value listener for the + * a JSON object value given the value's key. + */ + public ObjectValueListener objectListenerForValue(String key) { ColumnMetadata colSchema = MetadataUtils.newMap(key); + return objectListenerFor(colSchema, colSchema.tupleSchema()); + } + + /** + * Create a map column and its associated object value listener for the + * given key and optional provided schema. + */ + public ObjectValueListener objectListenerFor(ColumnMetadata colSchema, TupleMetadata providedSchema) { return new ObjectValueListener(loader, colSchema, new TupleListener(loader, addFieldWriter(colSchema).tuple(), providedSchema)); } - public ArrayValueListener objectArrayListenerFor(ColumnMetadata providedCol) { - return objectArrayListenerFor(providedCol.name(), providedCol.tupleSchema()); + /** + * Create a map array column and its associated object array listener + * for the given key. + */ + public ArrayValueListener objectArrayListenerForValue(String key) { + ColumnMetadata colSchema = MetadataUtils.newMapArray(key); + return objectArrayListenerFor(colSchema, colSchema.tupleSchema()); } + /** + * Create a map array column and its associated object array listener + * for the given column schema and optional provided schema. + */ public ArrayValueListener objectArrayListenerFor( - String key, TupleMetadata providedSchema) { - ColumnMetadata colSchema = MetadataUtils.newMapArray(key); + ColumnMetadata colSchema, TupleMetadata providedSchema) { ArrayWriter arrayWriter = addFieldWriter(colSchema).array(); return new ObjectArrayValueListener(loader, colSchema, new ObjectArrayListener(loader, arrayWriter, @@ -279,10 +352,125 @@ public class TupleListener implements ObjectListener { new TupleListener(loader, arrayWriter.tuple(), providedSchema)))); } - public ArrayValueListener arrayListenerFor(String key, JsonType jsonType) { - ColumnMetadata colSchema = MetadataUtils.newScalar(key, - Types.repeated(scalarTypeFor(key, jsonType))); - return scalarArrayListenerFor(colSchema); + /** + * Create a RepeatedList which contains (empty) Map objects using the provided + * schema. That is, create a multi-dimensional array of maps. + * The map fields are created on the fly, optionally using the provided schema. + */ + private ValueListener multiDimObjectArrayListenerForValue(String key, ValueDef valueDef) { + return multiDimObjectArrayListenerFor( + repeatedListSchemaFor(key, valueDef.dimensions(), + MetadataUtils.newMapArray(key)), + valueDef.dimensions(), null); + } + + /** + * Create a multi- (2+) dimensional scalar array from a column schema, dimension + * count hint, and optional provided schema. + */ + private ValueListener multiDimObjectArrayListenerFor(ColumnMetadata colSchema, + int dims, TupleMetadata providedSchema) { + return RepeatedListValueListener.multiDimObjectArrayFor(loader, + addFieldWriter(colSchema), dims, providedSchema); + } + + /** + * Create a variant (UNION) column and its associated listener given + * a column schema. + */ + private ValueListener variantListenerFor(ColumnMetadata colSchema) { + return new VariantListener(loader, addFieldWriter(colSchema).variant()); + } + + /** + * Create a variant array (LIST) column and its associated listener given + * a column schema. + */ + private ValueListener variantArrayListenerFor(ColumnMetadata colSchema) { + return new ListListener(loader, addFieldWriter(colSchema)); + } + + /** + * Create a RepeatedList which contains Unions. (Actually, this is an + * array of List objects internally.) The variant is variable, it makes no + * sense to specify a schema for the variant. Also, omitting the schema + * save a large amount of complexity that will likely never be needed. + */ + @SuppressWarnings("unused") + private ValueListener repeatedListOfVariantListenerFor(String key, ValueDef valueDef) { + return multiDimVariantArrayListenerFor( + MetadataUtils.newVariant(key, DataMode.REPEATED), + valueDef.dimensions()); + } + + /** + * Create a multi- (2+) dimensional variant array from a column schema and dimension + * count hint. This is actually an (n-1) dimensional array of lists, where a LISt + * is a repeated UNION. + */ + private ValueListener multiDimVariantArrayListenerFor(ColumnMetadata colSchema, int dims) { + return RepeatedListValueListener.repeatedVariantListFor(loader, + addFieldWriter(colSchema)); + } + + /** + * Create a repeated list column and its multiple levels of inner structure + * from a provided schema. Repeated lists can nest to any number of levels to + * provide any number of dimensions. In general, if an array is <i>n</i>-dimensional, + * then there are <i>n</i>-1 repeated lists with some array type as the + * innermost dimension. + */ + private ValueListener multiDimArrayListenerForSchema(ColumnMetadata providedSchema) { + // Parse the stack of repeated lists to count the "outer" dimensions and + // to locate the innermost array (the "list" which is "repeated"). + int dims = 1; // For inner array + ColumnMetadata elementSchema = providedSchema; + while (MetadataUtils.isRepeatedList(elementSchema)) { + dims++; + elementSchema = elementSchema.childSchema(); + Preconditions.checkArgument(elementSchema != null); + } + + ColumnMetadata colSchema = repeatedListSchemaFor(providedSchema.name(), dims, + elementSchema.cloneEmpty()); + switch (elementSchema.structureType()) { + + case PRIMITIVE: + return multiDimScalarArrayListenerFor(colSchema, dims); + + case TUPLE: + return multiDimObjectArrayListenerFor(colSchema, + dims, elementSchema.tupleSchema()); + + case VARIANT: + return multiDimVariantArrayListenerFor(colSchema, dims); + + default: + throw loader.unsupportedType(providedSchema); + } + } + + /** + * Create a listener when we don't have type information. For the case + * {@code null} appears before other values. + */ + private ValueListener unknownListenerFor(String key) { + return new UnknownFieldListener(this, key); + } + + /** + * Create a listener when we don't have type information. For the case + * {@code []} appears before other values. + */ + private ValueListener unknownArrayListenerFor(String key, ValueDef valueDef) { + UnknownFieldListener fieldListener = new UnknownFieldListener(this, key); + fieldListener.array(valueDef); + return fieldListener; + } + + private ObjectWriter addFieldWriter(ColumnMetadata colSchema) { + int index = tupleWriter.addColumn(colSchema); + return tupleWriter.column(index); } /** @@ -321,85 +509,17 @@ public class TupleListener implements ObjectListener { } } - public ArrayValueListener scalarArrayListenerFor(ColumnMetadata colSchema) { - return new ScalarArrayValueListener(loader, colSchema, - new ScalarArrayListener(loader, colSchema, - scalarListenerFor(colSchema))); - } - /** - * Create a listener when we don't have type information. For the case - * {@code null} appears before other values. + * Build up a repeated list column definition given a specification of the + * number of dimensions and the JSON type. Creation of the element type is + * via a closure that builds the needed schema. */ - private ValueListener unknownListenerFor(String key) { - return new UnknownFieldListener(this, key); - } - - /** - * Create a listener when we don't have type information. For the case - * {@code []} appears before other values. - */ - private ValueListener unknownArrayListenerFor(String key, ValueDef valueDef) { - UnknownFieldListener fieldListener = new UnknownFieldListener(this, key); - fieldListener.array(valueDef); - return fieldListener; - } - - private ValueListener variantListenerFor(ColumnMetadata colSchema) { - return new VariantListener(loader, addFieldWriter(colSchema).variant()); - } - - private ValueListener variantArrayListenerFor(ColumnMetadata colSchema) { - return new ListListener(loader, addFieldWriter(colSchema)); - } - - private ValueListener repeatedListListenerFor(String key, ValueDef valueDef) { - ColumnMetadata colSchema = new RepeatedListBuilder(key) - .addArray(scalarTypeFor(key, valueDef.type())) - .buildColumn(); - return repeatedListListenerFor(colSchema); - } - - /** - * Create a RepeatedList which contains (empty) Map objects using the provided - * schema. The map fields are created on the fly from the provided schema. - */ - private ValueListener repeatedListOfObjectsListenerFor(String key, ColumnMetadata providedCol) { - ColumnMetadata colSchema = new RepeatedListBuilder(key) - .addMapArray() - .resumeList() - .buildColumn(); - TupleMetadata providedSchema = providedCol == null ? null - : providedCol.childSchema().tupleSchema(); - return RepeatedListValueListener.repeatedObjectListFor(loader, - addFieldWriter(colSchema), providedSchema); - } - - /** - * Create a RepeatedList which contains Unions. (Actually, this is an - * array of List objects internally.) The variant is variable, it makes no - * sense to specify a schema for the variant. Also, omitting the schema - * save a large amount of complexity that will likely never be needed. - */ - private ValueListener repeatedListOfVariantListenerFor(String key) { - ColumnMetadata colSchema = new RepeatedListBuilder(key) - .addList() - .resumeList() - .buildColumn(); - return RepeatedListValueListener.repeatedVariantListFor(loader, - addFieldWriter(colSchema)); - } - - private ValueListener repeatedListListenerFor(ColumnMetadata colSchema) { - ColumnMetadata childSchema = colSchema.childSchema(); - if (childSchema != null) { - if (childSchema.isMap()) { - return repeatedListOfObjectsListenerFor(colSchema.name(), colSchema); - } - if (childSchema.isVariant()) { - return repeatedListOfVariantListenerFor(colSchema.name()); - } + private ColumnMetadata repeatedListSchemaFor(String key, int dims, + ColumnMetadata innerArray) { + ColumnMetadata prev = innerArray; + for (int i = 1; i < dims; i++) { + prev = MetadataUtils.newRepeatedList(key, prev); } - return RepeatedListValueListener.repeatedListFor(loader, addFieldWriter(colSchema)); + return prev; } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/UnknownFieldListener.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/UnknownFieldListener.java index 0a2ca34..530342d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/UnknownFieldListener.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/UnknownFieldListener.java @@ -105,7 +105,7 @@ public class UnknownFieldListener extends AbstractValueListener implements NullT @Override public ObjectListener object() { - return resolveScalar(JsonType.OBJECT).object(); + return resolveTo(parentTuple.objectListenerForValue(key)).object(); } /** @@ -115,7 +115,7 @@ public class UnknownFieldListener extends AbstractValueListener implements NullT */ protected ValueListener resolveScalar(JsonType type) { if (unknownArray == null) { - return resolveTo(parentTuple.scalarListenerFor(key, type)); + return resolveTo(parentTuple.scalarListenerForValue(key, type)); } else { // Saw {a: []}, {a: 10}. Since we infer that 10 is a @@ -154,11 +154,11 @@ public class UnknownFieldListener extends AbstractValueListener implements NullT if (unknownArray == null) { logger.warn("Ambiguous type! JSON field {}" + " contains all nulls. Assuming VARCHAR.", key); - resolveTo(parentTuple.scalarListenerFor(key, JsonType.STRING)); + resolveTo(parentTuple.scalarListenerForValue(key, JsonType.STRING)); } else { logger.warn("Ambiguous type! JSON array field {}" + " contains all empty arrays. Assuming repeated VARCHAR.", key); - resolveTo(parentTuple.arrayListenerFor(key, JsonType.STRING)); + resolveTo(parentTuple.scalarArrayListenerForValue(key, JsonType.STRING)); } } @@ -168,7 +168,7 @@ public class UnknownFieldListener extends AbstractValueListener implements NullT " starts with null element. Assuming repeated VARCHAR.", key); valueDef = new ValueDef(JsonType.STRING, valueDef.dimensions()); } - return resolveTo(parentTuple.listenerFor(key, valueDef)); + return resolveTo(parentTuple.listenerForValue(key, valueDef)); } /** diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java index 2b814ac..5c4425d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java @@ -36,28 +36,33 @@ import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.ObjectMapper; /** - * Parser for JSON that converts a stream of tokens from the Jackson JSON - * parser into a set of events on listeners structured to follow the - * data structure of the incoming data. JSON can assume many forms. This - * class assumes that the data is in a tree structure that corresponds - * to the Drill row structure: a series of object with (mostly) the - * same schema. Members of the top-level object can be Drill types: - * scalars, arrays, nested objects (Drill "MAP"s), and so on. + * Parser for a subset of the <a href="http://jsonlines.org/">jsonlines</a> + * format. In particular, supports line-delimited JSON objects, or a single + * array which holds a list of JSON objects. * <p> - * The structure parser follows the structure of the incoming data, - * whatever it might be. This class imposes no semantic rules on that - * data, it just "calls 'em as it sees 'em" as they say. The listeners - * are responsible for deciding if the data data makes sense, and if - * so, how it should be handled. + * Alternatively, a message parser can provide a path to an array of JSON + * objects within a messages such as a REST response. * <p> - * The root listener will receive an event to fields in the top-level - * object as those fields first appear. Each field is a value object - * and can correspond to a scalar, array, another object, etc. The - * type of the value is declared when known, but sometimes it is not - * known, such as if the value is {@code null}. And, of course, according - * to JSON, the value is free to change from one row to the next. The - * listener decides it if wants to handle such "schema change", and if - * so, how. + * Implemented as a parser which converts a stream of tokens from the Jackson + * JSON parser into a set of events on listeners structured to follow the data + * structure of the incoming data. JSON can assume many forms. This class + * assumes that the data is in a tree structure that corresponds to the Drill + * row structure: a series of object with (mostly) the same schema. Members of + * the top-level object can be Drill types: scalars, arrays, nested objects + * (Drill "MAP"s), and so on. + * <p> + * The structure parser follows the structure of the incoming data, whatever it + * might be. This class imposes no semantic rules on that data, it just "calls + * 'em as it sees 'em" as they say. The listeners are responsible for deciding + * if the data data makes sense, and if so, how it should be handled. + * <p> + * The root listener will receive an event to fields in the top-level object as + * those fields first appear. Each field is a value object and can correspond to + * a scalar, array, another object, etc. The type of the value is declared when + * known, but sometimes it is not known, such as if the value is {@code null}. + * And, of course, according to JSON, the value is free to change from one row + * to the next. The listener decides it if wants to handle such "schema change", + * and if so, how. */ public class JsonStructureParser { protected static final Logger logger = LoggerFactory.getLogger(JsonStructureParser.class); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java index ecd5b29..5fbcc25 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java @@ -128,6 +128,8 @@ public class TokenIterator { public String textValue() { try { return parser.getText(); + } catch (JsonParseException e) { + throw errorFactory.syntaxError(e); } catch (IOException e) { throw errorFactory.ioException(e); } @@ -136,6 +138,8 @@ public class TokenIterator { public long longValue() { try { return parser.getLongValue(); + } catch (JsonParseException e) { + throw errorFactory.syntaxError(e); } catch (IOException e) { throw errorFactory.ioException(e); } catch (UnsupportedConversionError e) { @@ -146,6 +150,8 @@ public class TokenIterator { public String stringValue() { try { return parser.getValueAsString(); + } catch (JsonParseException e) { + throw errorFactory.syntaxError(e); } catch (IOException e) { throw errorFactory.ioException(e); } catch (UnsupportedConversionError e) { @@ -156,6 +162,8 @@ public class TokenIterator { public double doubleValue() { try { return parser.getValueAsDouble(); + } catch (JsonParseException e) { + throw errorFactory.syntaxError(e); } catch (IOException e) { throw errorFactory.ioException(e); } catch (UnsupportedConversionError e) { diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestObjects.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestObjects.java index 420c2fa..7c6475d 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestObjects.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestObjects.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.rowSet.RowSet; @@ -32,7 +33,9 @@ import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.rowSet.RowSetUtilities; import org.junit.Test; +import org.junit.experimental.categories.Category; +@Category(RowSetTests.class) public class TestObjects extends BaseJsonLoaderTest { @Test diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestRepeatedList.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestRepeatedList.java index 8a54863..76d48f3 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestRepeatedList.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestRepeatedList.java @@ -24,20 +24,20 @@ import static org.apache.drill.test.rowSet.RowSetUtilities.singleObjArray; import static org.apache.drill.test.rowSet.RowSetUtilities.strArray; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.rowSet.RowSet; import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.rowSet.RowSetUtilities; import org.junit.Test; +import org.junit.experimental.categories.Category; /** - * Tests repeated lists to form a 2D array of various data types. + * Tests repeated lists to form a 2D or 3D array of various data types. */ +@Category(RowSetTests.class) public class TestRepeatedList extends BaseJsonLoaderTest { @Test @@ -173,21 +173,6 @@ public class TestRepeatedList extends BaseJsonLoaderTest { } @Test - public void test3DScalars() { - String json = - "{a: [[[1, 2]]]]}"; - JsonLoaderFixture loader = new JsonLoaderFixture(); - loader.open(json); - try { - loader.next(); - fail(); - } catch (UserException e) { - assertTrue(e.getMessage().contains("arrays deeper than two levels")); - } - loader.close(); - } - - @Test public void test2DObjects() { String json = "{a: [[{b: 1}, {b: 2}], [{b: 3}, {b: 4}, {b: 5}]]}\n" + @@ -298,4 +283,146 @@ public class TestRepeatedList extends BaseJsonLoaderTest { assertNull(loader.next()); loader.close(); } + + @Test + public void test3DScalars() { + String json = + "{a: [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]}"; + JsonLoaderFixture loader = new JsonLoaderFixture(); + loader.open(json); + RowSet results = loader.next(); + assertNotNull(results); + + TupleMetadata expectedSchema = new SchemaBuilder() + .addRepeatedList("a") + .addDimension() + .addArray(MinorType.BIGINT) + .resumeList() + .resumeSchema() + .build(); + RowSet expected = fixture.rowSetBuilder(expectedSchema) + .addSingleCol(objArray( + objArray(longArray(1L, 2L), longArray(3L, 4L)), + objArray(longArray(5L, 6L), longArray(7L, 8L)))) + .build(); + RowSetUtilities.verify(expected, results); + assertNull(loader.next()); + loader.close(); + } + + @Test + public void testNullTo3DScalars() { + String json = + "{a: null}\n" + + "{a: [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]}"; + JsonLoaderFixture loader = new JsonLoaderFixture(); + loader.open(json); + RowSet results = loader.next(); + assertNotNull(results); + + TupleMetadata expectedSchema = new SchemaBuilder() + .addRepeatedList("a") + .addDimension() + .addArray(MinorType.BIGINT) + .resumeList() + .resumeSchema() + .build(); + RowSet expected = fixture.rowSetBuilder(expectedSchema) + .addSingleCol(objArray()) + .addSingleCol(objArray( + objArray(longArray(1L, 2L), longArray(3L, 4L)), + objArray(longArray(5L, 6L), longArray(7L, 8L)))) + .build(); + RowSetUtilities.verify(expected, results); + assertNull(loader.next()); + loader.close(); + } + + @Test + public void testUnknownTo3DScalars() { + String json = + "{a: []}\n" + + "{a: [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]}"; + JsonLoaderFixture loader = new JsonLoaderFixture(); + loader.open(json); + RowSet results = loader.next(); + assertNotNull(results); + + TupleMetadata expectedSchema = new SchemaBuilder() + .addRepeatedList("a") + .addDimension() + .addArray(MinorType.BIGINT) + .resumeList() + .resumeSchema() + .build(); + RowSet expected = fixture.rowSetBuilder(expectedSchema) + .addSingleCol(objArray()) + .addSingleCol(objArray( + objArray(longArray(1L, 2L), longArray(3L, 4L)), + objArray(longArray(5L, 6L), longArray(7L, 8L)))) + .build(); + RowSetUtilities.verify(expected, results); + assertNull(loader.next()); + loader.close(); + } + + @Test + public void test3DObjects() { + String json = + "{a: [[[{n: 1}, {n: 2}], [{n: 3}, {n: 4}]], " + + "[[{n: 5}, {n: 6}], [{n: 7}, {n: 8}]]]}"; + JsonLoaderFixture loader = new JsonLoaderFixture(); + loader.open(json); + RowSet results = loader.next(); + assertNotNull(results); + + TupleMetadata expectedSchema = new SchemaBuilder() + .addRepeatedList("a") + .addDimension() + .addMapArray() + .addNullable("n", MinorType.BIGINT) + .resumeList() + .resumeList() + .resumeSchema() + .build(); + RowSet expected = fixture.rowSetBuilder(expectedSchema) + .addSingleCol(objArray( + objArray(objArray(mapValue(1L), mapValue(2L)), objArray(mapValue(3L), mapValue(4L))), + objArray(objArray(mapValue(5L), mapValue(6L)), objArray(mapValue(7L), mapValue(8L))))) + .build(); + RowSetUtilities.verify(expected, results); + assertNull(loader.next()); + loader.close(); + } + + @Test + public void testUnknownTo3DObjects() { + String json = + "{a: []}\n" + + "{a: [[[{n: 1}, {n: 2}], [{n: 3}, {n: 4}]], " + + "[[{n: 5}, {n: 6}], [{n: 7}, {n: 8}]]]}"; + JsonLoaderFixture loader = new JsonLoaderFixture(); + loader.open(json); + RowSet results = loader.next(); + assertNotNull(results); + + TupleMetadata expectedSchema = new SchemaBuilder() + .addRepeatedList("a") + .addDimension() + .addMapArray() + .addNullable("n", MinorType.BIGINT) + .resumeList() + .resumeList() + .resumeSchema() + .build(); + RowSet expected = fixture.rowSetBuilder(expectedSchema) + .addSingleCol(objArray()) + .addSingleCol(objArray( + objArray(objArray(mapValue(1L), mapValue(2L)), objArray(mapValue(3L), mapValue(4L))), + objArray(objArray(mapValue(5L), mapValue(6L)), objArray(mapValue(7L), mapValue(8L))))) + .build(); + RowSetUtilities.verify(expected, results); + assertNull(loader.next()); + loader.close(); + } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalarArrays.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalarArrays.java index 8d82878..ea6b0e9 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalarArrays.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalarArrays.java @@ -26,6 +26,7 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.rowSet.RowSet; @@ -33,6 +34,7 @@ import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.rowSet.RowSetUtilities; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Test scalar arrays. Without a schema, the first array token @@ -44,6 +46,7 @@ import org.junit.Test; * Verifies that null array elements are converted to a default * value for the type (false, 0 or empty string.) */ +@Category(RowSetTests.class) public class TestScalarArrays extends BaseJsonLoaderTest { @Test diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalars.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalars.java index 0b19366..09ba62f 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalars.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestScalars.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.resultSet.project.Projections; @@ -31,6 +32,7 @@ import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.rowSet.RowSetUtilities; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Tests JSON scalar handling. Without a schema, the first non-null value @@ -44,6 +46,7 @@ import org.junit.Test; * to a few messy rows a billion rows in, or due to the order that the scanners * see the data. */ +@Category(RowSetTests.class) public class TestScalars extends BaseJsonLoaderTest { /** diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestUnknowns.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestUnknowns.java index 0ac5ec6..501c1a7 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestUnknowns.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestUnknowns.java @@ -24,12 +24,14 @@ import static org.apache.drill.test.rowSet.RowSetUtilities.strArray; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.rowSet.RowSet; import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.rowSet.RowSetUtilities; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Tests the ability of the JSON reader to "wait out" a set of leading @@ -37,6 +39,7 @@ import org.junit.Test; * deciding on the column type. Hitting the end of batch, or an array * that contains only null values, forces resolution to VARCHAR. */ +@Category(RowSetTests.class) public class TestUnknowns extends BaseJsonLoaderTest { @Test @@ -84,6 +87,30 @@ public class TestUnknowns extends BaseJsonLoaderTest { loader.close(); } + @Test + public void testNullToObject() { + String json = + "{a: null} {a: {b: 20, c: 220}}"; + JsonLoaderFixture loader = new JsonLoaderFixture(); + loader.open(json); + RowSet results = loader.next(); + assertNotNull(results); + + TupleMetadata expectedSchema = new SchemaBuilder() + .addMap("a") + .addNullable("b", MinorType.BIGINT) + .addNullable("c", MinorType.BIGINT) + .resumeSchema() + .build(); + RowSet expected = fixture.rowSetBuilder(expectedSchema) + .addSingleCol(mapValue(null, null)) + .addSingleCol(mapValue(20, 220)) + .build(); + RowSetUtilities.verify(expected, results); + assertNull(loader.next()); + loader.close(); + } + /** * Input contains all nulls. The loader will force resolve to a * type, and will choose VARCHAR as all scalar types which diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestVariant.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestVariant.java index 480aab3..66bc4be 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestVariant.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/loader/TestVariant.java @@ -22,13 +22,16 @@ import static org.apache.drill.test.rowSet.RowSetUtilities.objArray; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.rowSet.RowSet; import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.rowSet.RowSetUtilities; import org.junit.Test; +import org.junit.experimental.categories.Category; +@Category(RowSetTests.class) public class TestVariant extends BaseJsonLoaderTest { @Test diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java index 0dc469d..c9d2294 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java @@ -304,4 +304,9 @@ public class MetadataUtils { } return false; } + + public static boolean isRepeatedList(ColumnMetadata col) { + return col.type() == MinorType.LIST && + col.mode() == DataMode.REPEATED; + } }