This is an automated email from the ASF dual-hosted git repository. siddteotia pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new 66ed883 Complex FieldSpec (#5422) 66ed883 is described below commit 66ed883627271611bdf36d63177e51ef80a2e53d Author: Sidd <siddharthteo...@gmail.com> AuthorDate: Fri May 22 09:27:32 2020 -0700 Complex FieldSpec (#5422) * new * Complex field spec * cleanup Co-authored-by: Siddharth Teotia <steo...@steotia-mn1.linkedin.biz> --- .../apache/pinot/spi/data/ComplexFieldSpec.java | 92 ++++++++++++++++++++++ .../java/org/apache/pinot/spi/data/FieldSpec.java | 6 +- .../java/org/apache/pinot/spi/data/Schema.java | 31 ++++++++ 3 files changed, 127 insertions(+), 2 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java new file mode 100644 index 0000000..3b29fc9 --- /dev/null +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.spi.data; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.google.common.base.Preconditions; +import java.util.HashMap; +import java.util.Map; +import javax.annotation.Nonnull; + + +/** + * FieldSpec for complex fields. The {@link org.apache.pinot.spi.data.FieldSpec.FieldType} + * is COMPLEX and the inner data type represents the root data type of the field. + * It could be STRUCT, MAP or LIST. A complex field is composable with a single root type + * and a number of child types. Although we have multi-value primitive columns, LIST + * is for representing lists of both complex and primitives inside a complex field. + * + * Consider a person json where the root type is STRUCT and composes of inner members: + * STRUCT( + * name: STRING + * age: INT + * salary: INT + * addresses: LIST (STRUCT + * apt: INT + * street: STRING + * city: STRING + * zip: INT + * ) + * ) + * + * The fieldspec would be COMPLEX with type as STRUCT and 4 inner members + * to model the hierarchy + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public final class ComplexFieldSpec extends FieldSpec { + + private final Map<String, FieldSpec> _childFieldSpecs; + + // Default constructor required by JSON de-serializer + public ComplexFieldSpec() { + super(); + _childFieldSpecs = new HashMap<>(); + } + + public ComplexFieldSpec(@Nonnull String name, DataType dataType, boolean isSingleValueField) { + super(name, dataType, isSingleValueField); + Preconditions.checkArgument(dataType == DataType.STRUCT || dataType == DataType.MAP || dataType == DataType.LIST); + _childFieldSpecs = new HashMap<>(); + } + + public FieldSpec getChildFieldSpec(String child) { + return _childFieldSpecs.get(child); + } + + public void addChildFieldSpec(String child, FieldSpec fieldSpec) { + _childFieldSpecs.put(child, fieldSpec); + } + + public Map<String, FieldSpec> getChildFieldSpecs() { + return _childFieldSpecs; + } + + @JsonIgnore + @Nonnull + @Override + public FieldType getFieldType() { + return FieldType.COMPLEX; + } + + @Override + public String toString() { + return "field type: COMPLEX, field name: " + _name + ", root data type: " + _dataType; + } +} \ No newline at end of file diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java index d40a2b4..de3e3ee 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java @@ -318,14 +318,16 @@ public abstract class FieldSpec implements Comparable<FieldSpec> { * segments, otherwise treated the same as <code>DIMENSION</code> field. */ public enum FieldType { - DIMENSION, METRIC, TIME, DATE_TIME + DIMENSION, METRIC, TIME, DATE_TIME, COMPLEX } /** * The <code>DataType</code> enum is used to demonstrate the data type of a field. */ public enum DataType { - INT, LONG, FLOAT, DOUBLE, BOOLEAN/* Stored as STRING */, STRING, BYTES; + // LIST is for complex lists which is different from multi-value column of primitives + // STRUCT, MAP and LIST are composable to form a COMPLEX field + INT, LONG, FLOAT, DOUBLE, BOOLEAN/* Stored as STRING */, STRING, BYTES, STRUCT, MAP, LIST; /** * Returns the data type stored in Pinot. diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java index b227ad5..fb6330e 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java @@ -65,6 +65,7 @@ public final class Schema { private final List<MetricFieldSpec> _metricFieldSpecs = new ArrayList<>(); private TimeFieldSpec _timeFieldSpec; private final List<DateTimeFieldSpec> _dateTimeFieldSpecs = new ArrayList<>(); + private final List<ComplexFieldSpec> _complexFieldSpecs = new ArrayList<>(); // Json ignored fields private transient final Map<String, FieldSpec> _fieldSpecMap = new HashMap<>(); @@ -188,6 +189,9 @@ public final class Schema { _dateTimeNames.add(columnName); _dateTimeFieldSpecs.add((DateTimeFieldSpec) fieldSpec); break; + case COMPLEX: + _complexFieldSpecs.add((ComplexFieldSpec) fieldSpec); + break; default: throw new UnsupportedOperationException("Unsupported field type: " + fieldType); } @@ -364,6 +368,13 @@ public final class Schema { } jsonObject.set("dateTimeFieldSpecs", jsonArray); } + if (!_complexFieldSpecs.isEmpty()) { + ArrayNode jsonArray = JsonUtils.newArrayNode(); + for (ComplexFieldSpec complexFieldSpec : _complexFieldSpecs) { + jsonArray.add(complexFieldSpec.toJsonObject()); + } + jsonObject.set("complexFieldSpecs", jsonArray); + } return jsonObject; } @@ -436,6 +447,16 @@ public final class Schema { return false; } break; + case COMPLEX: + switch (dataType) { + case STRUCT: + case MAP: + case LIST: + break; + default: + ctxLogger.info("Unsupported data type: {} in COMPLEX field: {}", dataType, fieldName); + return false; + } default: ctxLogger.info("Unsupported field type: {} for field: {}", dataType, fieldName); return false; @@ -540,6 +561,16 @@ public final class Schema { return this; } + /** + * Add complex field spec + * @param name name of complex (nested) field + * @param dataType root data type of complex field + */ + public SchemaBuilder addComplex(String name, DataType dataType) { + _schema.addField(new ComplexFieldSpec(name, dataType, /* single value field */ true)); + return this; + } + public Schema build() { if (!_schema.validate(LOGGER)) { throw new RuntimeException("Invalid schema"); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org