Repository: arrow Updated Branches: refs/heads/master 3f85cee51 -> 2d8e82056
ARROW-319: Add canonical Arrow Schema json representation Author: Julien Le Dem <jul...@dremio.com> Closes #158 from julienledem/json and squashes the following commits: 796cc6d [Julien Le Dem] add json documentation f0b2a39 [Julien Le Dem] add sanity checks 7dd6d45 [Julien Le Dem] fix typo 248d3ec [Julien Le Dem] more tests f2bc3fb [Julien Le Dem] ARROW-319: Add canonical Arrow Schema json representation Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2d8e8205 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2d8e8205 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2d8e8205 Branch: refs/heads/master Commit: 2d8e82056afdcf125e6e512f96007389ce79c1c7 Parents: 3f85cee Author: Julien Le Dem <jul...@dremio.com> Authored: Fri Oct 7 12:13:58 2016 -0700 Committer: Julien Le Dem <jul...@dremio.com> Committed: Fri Oct 7 12:13:58 2016 -0700 ---------------------------------------------------------------------- format/Metadata.md | 81 +++++++++ .../src/main/codegen/templates/ArrowType.java | 165 +++++++++++++++++-- .../arrow/vector/schema/ArrowVectorType.java | 43 ++++- .../apache/arrow/vector/schema/TypeLayout.java | 11 +- .../arrow/vector/schema/VectorLayout.java | 5 +- .../apache/arrow/vector/types/pojo/Field.java | 23 ++- .../apache/arrow/vector/types/pojo/Schema.java | 90 ++++++++-- .../arrow/vector/types/pojo/TestSchema.java | 119 +++++++++++++ 8 files changed, 501 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8e8205/format/Metadata.md ---------------------------------------------------------------------- diff --git a/format/Metadata.md b/format/Metadata.md index 3388a7e..653a4c7 100644 --- a/format/Metadata.md +++ b/format/Metadata.md @@ -63,6 +63,87 @@ table Field { The `type` is the logical type of the field. Nested types, such as List, Struct, and Union, have a sequence of child fields. +a JSON representation of the schema is also provided: +Field: +``` +{ + "name" : "name_of_the_field", + "nullable" : false, + "type" : /* Type */, + "children" : [ /* Field */ ], + "typeLayout" : { + "vectors" : [ /* VectorLayout */ ] + } +} +``` +VectorLayout: +``` +{ + "type" : "DATA|OFFSET|VALIDITY|TYPE", + "typeBitWidth" : /* int */ +} +``` +Type: +``` +{ + "name" : "null|struct|list|union|int|floatingpoint|utf8|binary|bool|decimal|date|time|timestamp|interval" + // fields as defined in the flatbuff depending on the type name +} +``` +Union: +``` +{ + "name" : "union", + "mode" : "Sparse|Dense", + "typeIds" : [ /* integer */ ] +} +``` +Int: +``` +{ + "name" : "int", + "bitWidth" : /* integer */, + "isSigned" : /* boolean */ +} +``` +FloatingPoint: +``` +{ + "name" : "floatingpoint", + "precision" : "HALF|SINGLE|DOUBLE" +} +``` +Decimal: +``` +{ + "name" : "decimal", + "precision" : /* integer */, + "scale" : /* integer */ +} +``` +Timestamp: +``` +{ + "name" : "timestamp", + "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND" +} +``` +Interval: +``` +{ + "name" : "interval", + "unit" : "YEAR_MONTH|DAY_TIME" +} +``` +Schema: +``` +{ + "fields" : [ + /* Field */ + ] +} +``` + ## Record data headers A record batch is a collection of top-level named, equal length Arrow arrays http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8e8205/java/vector/src/main/codegen/templates/ArrowType.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java index 30f2c68..4069e60 100644 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ b/java/vector/src/main/codegen/templates/ArrowType.java @@ -16,12 +16,6 @@ * limitations under the License. */ -import org.apache.arrow.flatbuf.Field; -import org.apache.arrow.flatbuf.Type; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; - -import java.util.Objects; - <@pp.dropOutputFile /> <@pp.changeOutputFile name="/org/apache/arrow/vector/types/pojo/ArrowType.java" /> <#include "/@includes/license.ftl" /> @@ -31,13 +25,150 @@ package org.apache.arrow.vector.types.pojo; import com.google.flatbuffers.FlatBufferBuilder; import org.apache.arrow.flatbuf.Type; +import java.io.IOException; import java.util.Objects; +import org.apache.arrow.flatbuf.Precision; +import org.apache.arrow.flatbuf.UnionMode; +import org.apache.arrow.flatbuf.TimeUnit; +import org.apache.arrow.flatbuf.IntervalUnit; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + /** * Arrow types **/ +@JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + include = JsonTypeInfo.As.PROPERTY, + property = "name") +@JsonSubTypes({ +<#list arrowTypes.types as type> + @JsonSubTypes.Type(value = ArrowType.${type.name}.class, name = "${type.name?remove_ending("_")?lower_case}"), +</#list> +}) public abstract class ArrowType { + private static class FloatingPointPrecisionSerializer extends JsonSerializer<Short> { + @Override + public void serialize(Short precision, + JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) + throws IOException, JsonProcessingException { + jsonGenerator.writeObject(Precision.name(precision)); + } + } + + private static class FloatingPointPrecisionDeserializer extends JsonDeserializer<Short> { + @Override + public Short deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException { + String name = p.getText(); + switch(name) { + case "HALF": + return Precision.HALF; + case "SINGLE": + return Precision.SINGLE; + case "DOUBLE": + return Precision.DOUBLE; + default: + throw new IllegalArgumentException("unknown precision: " + name); + } + } + } + + private static class UnionModeSerializer extends JsonSerializer<Short> { + @Override + public void serialize(Short mode, + JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) + throws IOException, JsonProcessingException { + jsonGenerator.writeObject(UnionMode.name(mode)); + } + } + + private static class UnionModeDeserializer extends JsonDeserializer<Short> { + @Override + public Short deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException { + String name = p.getText(); + switch(name) { + case "Sparse": + return UnionMode.Sparse; + case "Dense": + return UnionMode.Dense; + default: + throw new IllegalArgumentException("unknown union mode: " + name); + } + } + } + + private static class TimestampUnitSerializer extends JsonSerializer<Short> { + @Override + public void serialize(Short unit, + JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) + throws IOException, JsonProcessingException { + jsonGenerator.writeObject(TimeUnit.name(unit)); + } + } + + private static class TimestampUnitDeserializer extends JsonDeserializer<Short> { + @Override + public Short deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException { + String name = p.getText(); + switch(name) { + case "SECOND": + return TimeUnit.SECOND; + case "MILLISECOND": + return TimeUnit.MILLISECOND; + case "MICROSECOND": + return TimeUnit.MICROSECOND; + case "NANOSECOND": + return TimeUnit.NANOSECOND; + default: + throw new IllegalArgumentException("unknown time unit: " + name); + } + } + } + + private static class IntervalUnitSerializer extends JsonSerializer<Short> { + @Override + public void serialize(Short unit, + JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) + throws IOException, JsonProcessingException { + jsonGenerator.writeObject(IntervalUnit.name(unit)); + } + } + + private static class IntervalUnitDeserializer extends JsonDeserializer<Short> { + @Override + public Short deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException { + String name = p.getText(); + switch(name) { + case "YEAR_MONTH": + return IntervalUnit.YEAR_MONTH; + case "DAY_TIME": + return IntervalUnit.DAY_TIME; + default: + throw new IllegalArgumentException("unknown interval unit: " + name); + } + } + } + + @JsonIgnore public abstract byte getTypeType(); public abstract int getType(FlatBufferBuilder builder); public abstract <T> T accept(ArrowTypeVisitor<T> visitor); @@ -70,7 +201,12 @@ public abstract class ArrowType { </#list> <#if type.fields?size != 0> - public ${type.name}(<#list type.fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) { + @JsonCreator + public ${type.name}( + <#list type.fields as field> + <#if field.type == "short"> @JsonDeserialize(using = ${type.name}${field.name?cap_first}Deserializer.class) </#if>@JsonProperty("${field.name}") ${field.type} ${field.name}<#if field_has_next>, </#if> + </#list> + ) { <#list type.fields as field> this.${field.name} = ${field.name}; </#list> @@ -86,20 +222,29 @@ public abstract class ArrowType { public int getType(FlatBufferBuilder builder) { <#list type.fields as field> <#if field.type == "String"> - int ${field.name} = builder.createString(this.${field.name}); + int ${field.name} = this.${field.name} == null ? -1 : builder.createString(this.${field.name}); </#if> <#if field.type == "int[]"> - int ${field.name} = org.apache.arrow.flatbuf.${type.name}.create${field.name?cap_first}Vector(builder, this.${field.name}); + int ${field.name} = this.${field.name} == null ? -1 : org.apache.arrow.flatbuf.${type.name}.create${field.name?cap_first}Vector(builder, this.${field.name}); </#if> </#list> org.apache.arrow.flatbuf.${type.name}.start${type.name}(builder); <#list type.fields as field> - org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, ${field.name}); + <#if field.type == "String" || field.type == "int[]"> + if (this.${field.name} != null) { + org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, ${field.name}); + } + <#else> + org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, this.${field.name}); + </#if> </#list> return org.apache.arrow.flatbuf.${type.name}.end${type.name}(builder); } <#list fields as field> + <#if field.type == "short"> + @JsonSerialize(using = ${type.name}${field.name?cap_first}Serializer.class) + </#if> public ${field.type} get${field.name?cap_first}() { return ${field.name}; } http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8e8205/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java index 9b7fa45..8fe8e48 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java @@ -17,8 +17,15 @@ */ package org.apache.arrow.vector.schema; +import java.util.Map; + import org.apache.arrow.flatbuf.VectorType; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableMap.Builder; + public class ArrowVectorType { public static final ArrowVectorType DATA = new ArrowVectorType(VectorType.DATA); @@ -26,22 +33,52 @@ public class ArrowVectorType { public static final ArrowVectorType VALIDITY = new ArrowVectorType(VectorType.VALIDITY); public static final ArrowVectorType TYPE = new ArrowVectorType(VectorType.TYPE); + private static final Map<String, ArrowVectorType> typeByName; + static { + ArrowVectorType[] types = { DATA, OFFSET, VALIDITY, TYPE }; + Builder<String, ArrowVectorType> builder = ImmutableMap.builder(); + for (ArrowVectorType type: types) { + builder.put(type.getName(), type); + } + typeByName = builder.build(); + } + + public static ArrowVectorType fromName(String name) { + ArrowVectorType type = typeByName.get(name); + if (type == null) { + throw new IllegalArgumentException("Unknown type " + name); + } + return type; + } + private final short type; public ArrowVectorType(short type) { this.type = type; + // validate that the type is valid + getName(); + } + + @JsonCreator + private ArrowVectorType(String name) { + this.type = fromName(name).type; } public short getType() { return type; } - @Override - public String toString() { + @JsonValue + public String getName() { try { return VectorType.name(type); } catch (ArrayIndexOutOfBoundsException e) { - return "Unlnown type " + type; + throw new IllegalArgumentException("Unknown type " + type); } } + + @Override + public String toString() { + return getName(); + } } http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8e8205/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java index 072385a..06ae203 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java @@ -19,6 +19,7 @@ package org.apache.arrow.vector.schema; import static java.util.Arrays.asList; import static org.apache.arrow.flatbuf.Precision.DOUBLE; +import static org.apache.arrow.flatbuf.Precision.HALF; import static org.apache.arrow.flatbuf.Precision.SINGLE; import static org.apache.arrow.vector.schema.VectorLayout.booleanVector; import static org.apache.arrow.vector.schema.VectorLayout.byteVector; @@ -49,6 +50,9 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; /** @@ -110,6 +114,9 @@ public class TypeLayout { @Override public TypeLayout visit(FloatingPoint type) { int bitWidth; switch (type.getPrecision()) { + case HALF: + bitWidth = 16; + break; case SINGLE: bitWidth = 32; break; @@ -184,7 +191,8 @@ public class TypeLayout { private final List<VectorLayout> vectors; - public TypeLayout(List<VectorLayout> vectors) { + @JsonCreator + public TypeLayout(@JsonProperty("vectors") List<VectorLayout> vectors) { super(); this.vectors = Preconditions.checkNotNull(vectors); } @@ -198,6 +206,7 @@ public class TypeLayout { return vectors; } + @JsonIgnore public List<ArrowVectorType> getVectorTypes() { List<ArrowVectorType> types = new ArrayList<>(vectors.size()); for (VectorLayout vector : vectors) { http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8e8205/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java index 532e9d2..931c00a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java @@ -22,6 +22,8 @@ import static org.apache.arrow.vector.schema.ArrowVectorType.OFFSET; import static org.apache.arrow.vector.schema.ArrowVectorType.TYPE; import static org.apache.arrow.vector.schema.ArrowVectorType.VALIDITY; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.flatbuffers.FlatBufferBuilder; @@ -75,7 +77,8 @@ public class VectorLayout implements FBSerializable { private final ArrowVectorType type; - private VectorLayout(ArrowVectorType type, int typeBitWidth) { + @JsonCreator + private VectorLayout(@JsonProperty("type") ArrowVectorType type, @JsonProperty("typeBitWidth") int typeBitWidth) { super(); this.type = Preconditions.checkNotNull(type); this.typeBitWidth = (short)typeBitWidth; http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8e8205/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index cfa1ed4..49ba524 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector.types.pojo; +import static com.google.common.base.Preconditions.checkNotNull; import static org.apache.arrow.vector.types.pojo.ArrowType.getTypeForField; import java.util.List; @@ -26,6 +27,8 @@ import java.util.Objects; import org.apache.arrow.vector.schema.TypeLayout; import org.apache.arrow.vector.schema.VectorLayout; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; import com.google.flatbuffers.FlatBufferBuilder; @@ -36,20 +39,26 @@ public class Field { private final List<Field> children; private final TypeLayout typeLayout; - private Field(String name, boolean nullable, ArrowType type, List<Field> children, TypeLayout typeLayout) { + @JsonCreator + private Field( + @JsonProperty("name") String name, + @JsonProperty("nullable") boolean nullable, + @JsonProperty("type") ArrowType type, + @JsonProperty("children") List<Field> children, + @JsonProperty("typeLayout") TypeLayout typeLayout) { this.name = name; this.nullable = nullable; - this.type = type; + this.type = checkNotNull(type); if (children == null) { this.children = ImmutableList.of(); } else { this.children = children; } - this.typeLayout = typeLayout; + this.typeLayout = checkNotNull(typeLayout); } public Field(String name, boolean nullable, ArrowType type, List<Field> children) { - this(name, nullable, type, children, TypeLayout.getTypeLayout(type)); + this(name, nullable, type, children, TypeLayout.getTypeLayout(checkNotNull(type))); } public static Field convertField(org.apache.arrow.flatbuf.Field field) { @@ -77,7 +86,7 @@ public class Field { } public int getField(FlatBufferBuilder builder) { - int nameOffset = builder.createString(name); + int nameOffset = name == null ? -1 : builder.createString(name); int typeOffset = type.getType(builder); int[] childrenData = new int[children.size()]; for (int i = 0; i < children.size(); i++) { @@ -91,7 +100,9 @@ public class Field { } int layoutOffset = org.apache.arrow.flatbuf.Field.createLayoutVector(builder, buffersData); org.apache.arrow.flatbuf.Field.startField(builder); - org.apache.arrow.flatbuf.Field.addName(builder, nameOffset); + if (name != null) { + org.apache.arrow.flatbuf.Field.addName(builder, nameOffset); + } org.apache.arrow.flatbuf.Field.addNullable(builder, nullable); org.apache.arrow.flatbuf.Field.addTypeType(builder, type.getTypeType()); org.apache.arrow.flatbuf.Field.addType(builder, typeOffset); http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8e8205/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java index 231be9b..44b877e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java @@ -18,19 +18,91 @@ package org.apache.arrow.vector.types.pojo; +import static com.google.common.base.Preconditions.checkNotNull; import static org.apache.arrow.vector.types.pojo.Field.convertField; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Objects; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import com.fasterxml.jackson.databind.ObjectWriter; import com.google.common.collect.ImmutableList; import com.google.flatbuffers.FlatBufferBuilder; +/** + * An Arrow Schema + */ public class Schema { - private List<Field> fields; - public Schema(List<Field> fields) { - this.fields = ImmutableList.copyOf(fields); + /** + * @param the list of the fields + * @param name the name of the field to return + * @return the corresponding field + * @throws IllegalArgumentException if the field was not found + */ + public static Field findField(List<Field> fields, String name) { + for (Field field : fields) { + if (field.getName().equals(name)) { + return field; + } + } + throw new IllegalArgumentException(String.format("field %s not found in %s", name, fields)); + } + + private static final ObjectMapper mapper = new ObjectMapper(); + private static final ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); + private static final ObjectReader reader = mapper.readerFor(Schema.class); + + public static Schema fromJSON(String json) throws IOException { + return reader.readValue(checkNotNull(json)); + } + + public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) { + ImmutableList.Builder<Field> childrenBuilder = ImmutableList.builder(); + for (int i = 0; i < schema.fieldsLength(); i++) { + childrenBuilder.add(convertField(schema.fields(i))); + } + List<Field> fields = childrenBuilder.build(); + return new Schema(fields); + } + + private final List<Field> fields; + + @JsonCreator + public Schema(@JsonProperty("fields") Iterable<Field> fields) { + List<Field> fieldList = new ArrayList<>(); + for (Field field : fields) { + fieldList.add(field); + } + this.fields = Collections.unmodifiableList(fieldList); + } + + public List<Field> getFields() { + return fields; + } + + /** + * @param name the name of the field to return + * @return the corresponding field + */ + public Field findField(String name) { + return findField(getFields(), name); + } + + public String toJson() { + try { + return writer.writeValueAsString(this); + } catch (JsonProcessingException e) { + // this should not happen + throw new RuntimeException(e); + } } public int getSchema(FlatBufferBuilder builder) { @@ -44,9 +116,6 @@ public class Schema { return org.apache.arrow.flatbuf.Schema.endSchema(builder); } - public List<Field> getFields() { - return fields; - } @Override public int hashCode() { @@ -61,15 +130,6 @@ public class Schema { return Objects.equals(this.fields, ((Schema) obj).fields); } - public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) { - ImmutableList.Builder<Field> childrenBuilder = ImmutableList.builder(); - for (int i = 0; i < schema.fieldsLength(); i++) { - childrenBuilder.add(convertField(schema.fields(i))); - } - List<Field> fields = childrenBuilder.build(); - return new Schema(fields); - } - @Override public String toString() { return "Schema" + fields; http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8e8205/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java new file mode 100644 index 0000000..0ef8be7 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.types.pojo; + +import static java.util.Arrays.asList; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.apache.arrow.flatbuf.IntervalUnit; +import org.apache.arrow.flatbuf.Precision; +import org.apache.arrow.flatbuf.TimeUnit; +import org.apache.arrow.flatbuf.UnionMode; +import org.junit.Test; + +public class TestSchema { + + private static Field field(String name, boolean nullable, ArrowType type, Field... children) { + return new Field(name, nullable, type, asList(children)); + } + + private static Field field(String name, ArrowType type, Field... children) { + return field(name, true, type, children); + } + + @Test + public void testAll() throws IOException { + Schema schema = new Schema(asList( + field("a", false, new ArrowType.Null()), + field("b", new ArrowType.Struct_(), field("ba", new ArrowType.Null())), + field("c", new ArrowType.List(), field("ca", new ArrowType.Null())), + field("d", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new ArrowType.Null())), + field("e", new ArrowType.Int(8, true)), + field("f", new ArrowType.FloatingPoint(Precision.SINGLE)), + field("g", new ArrowType.Utf8()), + field("h", new ArrowType.Binary()), + field("i", new ArrowType.Bool()), + field("j", new ArrowType.Decimal(5, 5)), + field("k", new ArrowType.Date()), + field("l", new ArrowType.Time()), + field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND)), + field("n", new ArrowType.Interval(IntervalUnit.DAY_TIME)) + )); + roundTrip(schema); + } + + @Test + public void testUnion() throws IOException { + Schema schema = new Schema(asList( + field("d", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new ArrowType.Null())) + )); + roundTrip(schema); + contains(schema, "Sparse"); + } + + @Test + public void testTS() throws IOException { + Schema schema = new Schema(asList( + field("a", new ArrowType.Timestamp(TimeUnit.SECOND)), + field("b", new ArrowType.Timestamp(TimeUnit.MILLISECOND)), + field("c", new ArrowType.Timestamp(TimeUnit.MICROSECOND)), + field("d", new ArrowType.Timestamp(TimeUnit.NANOSECOND)) + )); + roundTrip(schema); + contains(schema, "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND"); + } + + @Test + public void testInterval() throws IOException { + Schema schema = new Schema(asList( + field("a", new ArrowType.Interval(IntervalUnit.YEAR_MONTH)), + field("b", new ArrowType.Interval(IntervalUnit.DAY_TIME)) + )); + roundTrip(schema); + contains(schema, "YEAR_MONTH", "DAY_TIME"); + } + + @Test + public void testFP() throws IOException { + Schema schema = new Schema(asList( + field("a", new ArrowType.FloatingPoint(Precision.HALF)), + field("b", new ArrowType.FloatingPoint(Precision.SINGLE)), + field("c", new ArrowType.FloatingPoint(Precision.DOUBLE)) + )); + roundTrip(schema); + contains(schema, "HALF", "SINGLE", "DOUBLE"); + } + + private void roundTrip(Schema schema) throws IOException { + String json = schema.toJson(); + Schema actual = Schema.fromJSON(json); + assertEquals(schema.toJson(), actual.toJson()); + assertEquals(schema, actual); + } + + private void contains(Schema schema, String... s) throws IOException { + String json = schema.toJson(); + for (String string : s) { + assertTrue(json + " contains " + string, json.contains(string)); + } + } + +}