openinx commented on a change in pull request #1096:
URL: https://github.com/apache/iceberg/pull/1096#discussion_r436428372
##########
File path: build.gradle
##########
@@ -235,6 +235,38 @@ project(':iceberg-data') {
}
}
+project(':iceberg-flink') {
+ apply plugin: 'scala'
+
+ dependencies {
+ compile project(':iceberg-api')
+ compile project(':iceberg-common')
+ compile project(':iceberg-core')
+ compile project(':iceberg-data')
+ compile project(':iceberg-orc')
+ compile project(':iceberg-parquet')
+ compile project(':iceberg-arrow')
Review comment:
Seems it was introduced by mistake, will remove this.
##########
File path: build.gradle
##########
@@ -235,6 +235,38 @@ project(':iceberg-data') {
}
}
+project(':iceberg-flink') {
+ apply plugin: 'scala'
Review comment:
OK, although we are depending few scala jar from flink modules, we don't
need the scala plugin when building the iceberg-flink module because we won't
have any scala code.
Will remove this, thanks.
##########
File path: flink/src/test/java/org/apache/iceberg/flink/TestFlinkSchemaUtil.java
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink;
+
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.types.Types;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestFlinkSchemaUtil {
+
+ @Test
+ public void testConvertFlinkSchemaToIcebergSchema() {
+ TableSchema flinkSchema = TableSchema.builder()
+ .field("id", DataTypes.INT().notNull())
+ .field("name", DataTypes.STRING()) /* optional by default */
+ .field("salary", DataTypes.DOUBLE().notNull())
+ .field("locations", DataTypes.MAP(DataTypes.STRING(),
+ DataTypes.ROW(DataTypes.FIELD("posX",
DataTypes.DOUBLE().notNull(), "X field"),
+ DataTypes.FIELD("posY", DataTypes.DOUBLE().notNull(), "Y
field"))))
+ .field("strArray", DataTypes.ARRAY(DataTypes.STRING()).nullable())
+ .field("intArray", DataTypes.ARRAY(DataTypes.INT()).nullable())
+ .field("char", DataTypes.CHAR(10).notNull())
+ .field("varchar", DataTypes.VARCHAR(10).notNull())
+ .field("boolean", DataTypes.BOOLEAN().nullable())
+ .field("tinyint", DataTypes.TINYINT())
+ .field("smallint", DataTypes.SMALLINT())
+ .field("bigint", DataTypes.BIGINT())
+ .field("varbinary", DataTypes.VARBINARY(10))
+ .field("binary", DataTypes.BINARY(10))
+ .field("time", DataTypes.TIME())
+ .field("timestamp", DataTypes.TIMESTAMP())
+ .field("date", DataTypes.DATE())
+ .field("decimal", DataTypes.DECIMAL(2, 2))
+ .build();
+
+ Schema actualSchema = FlinkSchemaUtil.convert(flinkSchema);
+ Schema expectedSchema = new Schema(
+ Types.NestedField.required(0, "id", Types.IntegerType.get(), null),
+ Types.NestedField.optional(1, "name", Types.StringType.get(), null),
+ Types.NestedField.required(2, "salary", Types.DoubleType.get(), null),
+ Types.NestedField.optional(3, "locations",
Types.MapType.ofOptional(20, 21,
+ Types.StringType.get(),
+ Types.StructType.of(
+ Types.NestedField.required(18, "posX", Types.DoubleType.get(),
"X field"),
+ Types.NestedField.required(19, "posY", Types.DoubleType.get(),
"Y field")
+ ))),
+ Types.NestedField.optional(4, "strArray",
Types.ListType.ofOptional(22, Types.StringType.get())),
+ Types.NestedField.optional(5, "intArray",
Types.ListType.ofOptional(23, Types.IntegerType.get())),
+ Types.NestedField.required(6, "char", Types.StringType.get()),
+ Types.NestedField.required(7, "varchar", Types.StringType.get()),
+ Types.NestedField.optional(8, "boolean", Types.BooleanType.get()),
+ Types.NestedField.optional(9, "tinyint", Types.IntegerType.get()),
+ Types.NestedField.optional(10, "smallint", Types.IntegerType.get()),
+ Types.NestedField.optional(11, "bigint", Types.LongType.get()),
+ Types.NestedField.optional(12, "varbinary", Types.BinaryType.get()),
+ Types.NestedField.optional(13, "binary", Types.BinaryType.get()),
+ Types.NestedField.optional(14, "time", Types.TimeType.get()),
+ Types.NestedField.optional(15, "timestamp",
Types.TimestampType.withZone()),
+ Types.NestedField.optional(16, "date", Types.DateType.get()),
+ Types.NestedField.optional(17, "decimal", Types.DecimalType.of(2, 2))
+ );
+
+ Assert.assertEquals(expectedSchema.toString(), actualSchema.toString());
+ FlinkSchemaUtil.validate(expectedSchema, actualSchema, true, true);
+ FlinkSchemaUtil.validate(expectedSchema, actualSchema, false, true);
+ FlinkSchemaUtil.validate(expectedSchema, actualSchema, true, false);
+ FlinkSchemaUtil.validate(expectedSchema, actualSchema, false, false);
Review comment:
As we discussed above, we will move this `validate` method into the
common `TypeUtil`. Let me add unit test in `TestTypeUtil`. Thanks.
##########
File path: flink/src/main/java/org/apache/iceberg/flink/FlinkTypeToType.java
##########
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.table.types.AtomicDataType;
+import org.apache.flink.table.types.CollectionDataType;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.FieldsDataType;
+import org.apache.flink.table.types.KeyValueDataType;
+import org.apache.flink.table.types.logical.BigIntType;
+import org.apache.flink.table.types.logical.BinaryType;
+import org.apache.flink.table.types.logical.BooleanType;
+import org.apache.flink.table.types.logical.CharType;
+import org.apache.flink.table.types.logical.DateType;
+import org.apache.flink.table.types.logical.DecimalType;
+import org.apache.flink.table.types.logical.DoubleType;
+import org.apache.flink.table.types.logical.FloatType;
+import org.apache.flink.table.types.logical.IntType;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.SmallIntType;
+import org.apache.flink.table.types.logical.TimeType;
+import org.apache.flink.table.types.logical.TimestampType;
+import org.apache.flink.table.types.logical.TinyIntType;
+import org.apache.flink.table.types.logical.VarBinaryType;
+import org.apache.flink.table.types.logical.VarCharType;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+public class FlinkTypeToType extends FlinkTypeVisitor<Type> {
+ private final FieldsDataType root;
+ private int nextId = 0;
+
+ FlinkTypeToType(FieldsDataType root) {
+ this.root = root;
+ // the root struct's fields use the first ids
+ this.nextId = root.getFieldDataTypes().size();
+ }
+
+ private int getNextId() {
+ int next = nextId;
+ nextId += 1;
+ return next;
+ }
+
+ @Override
+ public Type fields(FieldsDataType dataType, Map<String, Tuple2<String,
Type>> types) {
+ List<Types.NestedField> newFields =
Lists.newArrayListWithExpectedSize(types.size());
+ boolean isRoot = root == dataType;
+
+ Map<String, DataType> fieldsMap = dataType.getFieldDataTypes();
+ int index = 0;
+ for (String name : types.keySet()) {
+ assert fieldsMap.containsKey(name);
+ DataType field = fieldsMap.get(name);
+ Tuple2<String, Type> tuple2 = types.get(name);
Review comment:
Sounds good.
##########
File path: flink/src/main/java/org/apache/iceberg/flink/FlinkTypeToType.java
##########
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.table.types.AtomicDataType;
+import org.apache.flink.table.types.CollectionDataType;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.FieldsDataType;
+import org.apache.flink.table.types.KeyValueDataType;
+import org.apache.flink.table.types.logical.BigIntType;
+import org.apache.flink.table.types.logical.BinaryType;
+import org.apache.flink.table.types.logical.BooleanType;
+import org.apache.flink.table.types.logical.CharType;
+import org.apache.flink.table.types.logical.DateType;
+import org.apache.flink.table.types.logical.DecimalType;
+import org.apache.flink.table.types.logical.DoubleType;
+import org.apache.flink.table.types.logical.FloatType;
+import org.apache.flink.table.types.logical.IntType;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.SmallIntType;
+import org.apache.flink.table.types.logical.TimeType;
+import org.apache.flink.table.types.logical.TimestampType;
+import org.apache.flink.table.types.logical.TinyIntType;
+import org.apache.flink.table.types.logical.VarBinaryType;
+import org.apache.flink.table.types.logical.VarCharType;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+public class FlinkTypeToType extends FlinkTypeVisitor<Type> {
+ private final FieldsDataType root;
+ private int nextId = 0;
+
+ FlinkTypeToType(FieldsDataType root) {
+ this.root = root;
+ // the root struct's fields use the first ids
+ this.nextId = root.getFieldDataTypes().size();
+ }
+
+ private int getNextId() {
+ int next = nextId;
+ nextId += 1;
+ return next;
+ }
+
+ @Override
+ public Type fields(FieldsDataType dataType, Map<String, Tuple2<String,
Type>> types) {
+ List<Types.NestedField> newFields =
Lists.newArrayListWithExpectedSize(types.size());
+ boolean isRoot = root == dataType;
+
+ Map<String, DataType> fieldsMap = dataType.getFieldDataTypes();
+ int index = 0;
+ for (String name : types.keySet()) {
+ assert fieldsMap.containsKey(name);
+ DataType field = fieldsMap.get(name);
+ Tuple2<String, Type> tuple2 = types.get(name);
+
+ int id = isRoot ? index : getNextId();
+ if (field.getLogicalType().isNullable()) {
+ newFields.add(Types.NestedField.optional(id, name, tuple2.f1,
tuple2.f0));
+ } else {
+ newFields.add(Types.NestedField.required(id, name, tuple2.f1,
tuple2.f0));
+ }
+ index++;
+ }
+ return Types.StructType.of(newFields);
+ }
+
+ @Override
+ public Type collection(CollectionDataType collection, Type elementType) {
+ if (collection.getElementDataType().getLogicalType().isNullable()) {
+ return Types.ListType.ofOptional(getNextId(), elementType);
+ } else {
+ return Types.ListType.ofRequired(getNextId(), elementType);
+ }
+ }
+
+ @Override
+ public Type map(KeyValueDataType map, Type keyType, Type valueType) {
+ if (map.getValueDataType().getLogicalType().isNullable()) {
+ return Types.MapType.ofOptional(getNextId(), getNextId(), keyType,
valueType);
+ } else {
+ return Types.MapType.ofRequired(getNextId(), getNextId(), keyType,
valueType);
+ }
+ }
+
+ @SuppressWarnings("checkstyle:CyclomaticComplexity")
+ @Override
+ public Type atomic(AtomicDataType type) {
+ LogicalType inner = type.getLogicalType();
+ if (inner instanceof VarCharType ||
+ inner instanceof CharType) {
+ return Types.StringType.get();
+ } else if (inner instanceof BooleanType) {
+ return Types.BooleanType.get();
+ } else if (inner instanceof IntType ||
+ inner instanceof SmallIntType ||
+ inner instanceof TinyIntType) {
+ return Types.IntegerType.get();
+ } else if (inner instanceof BigIntType) {
+ return Types.LongType.get();
+ } else if (inner instanceof VarBinaryType ||
+ inner instanceof BinaryType) {
Review comment:
Thanks for the reminding, will fix this and provide a unit test to
address it. Thanks.
##########
File path: flink/src/main/java/org/apache/iceberg/flink/FlinkTypeVisitor.java
##########
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.table.types.AtomicDataType;
+import org.apache.flink.table.types.CollectionDataType;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.FieldsDataType;
+import org.apache.flink.table.types.KeyValueDataType;
+import org.apache.flink.table.types.logical.RowType;
+
+public class FlinkTypeVisitor<T> {
+
+ static <T> T visit(DataType dataType, FlinkTypeVisitor<T> visitor) {
+ if (dataType instanceof FieldsDataType) {
+ FieldsDataType fieldsType = (FieldsDataType) dataType;
+ Map<String, DataType> fields = fieldsType.getFieldDataTypes();
+ Map<String, Tuple2<String, T>> fieldResults = new LinkedHashMap<>();
+ // Make sure that we're traversing the fields in the same order as
constructing the schema's fields.
+ RowType rowType = (RowType) dataType.getLogicalType();
Review comment:
Yes, it's the only one. Providing a Precondition seems good.
##########
File path: flink/src/main/java/org/apache/iceberg/flink/FlinkTypeVisitor.java
##########
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.table.types.AtomicDataType;
+import org.apache.flink.table.types.CollectionDataType;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.FieldsDataType;
+import org.apache.flink.table.types.KeyValueDataType;
+import org.apache.flink.table.types.logical.RowType;
+
+public class FlinkTypeVisitor<T> {
+
+ static <T> T visit(DataType dataType, FlinkTypeVisitor<T> visitor) {
+ if (dataType instanceof FieldsDataType) {
+ FieldsDataType fieldsType = (FieldsDataType) dataType;
+ Map<String, DataType> fields = fieldsType.getFieldDataTypes();
+ Map<String, Tuple2<String, T>> fieldResults = new LinkedHashMap<>();
+ // Make sure that we're traversing the fields in the same order as
constructing the schema's fields.
+ RowType rowType = (RowType) dataType.getLogicalType();
+ for (int i = 0; i < fields.size(); i++) {
+ String name = rowType.getFieldNames().get(i);
+ String comment =
rowType.getFields().get(i).getDescription().orElse(null);
+ fieldResults.put(name, Tuple2.of(comment, visit(fields.get(name),
visitor)));
+ }
+ return visitor.fields(fieldsType, fieldResults);
+ } else if (dataType instanceof CollectionDataType) {
+ CollectionDataType collectionType = (CollectionDataType) dataType;
+ return visitor.collection(collectionType,
+ visit(collectionType.getElementDataType(), visitor));
+ } else if (dataType instanceof KeyValueDataType) {
+ KeyValueDataType mapType = (KeyValueDataType) dataType;
+ return visitor.map(mapType,
+ visit(mapType.getKeyDataType(), visitor),
+ visit(mapType.getValueDataType(), visitor));
+ } else if (dataType instanceof AtomicDataType) {
+ AtomicDataType atomic = (AtomicDataType) dataType;
+ return visitor.atomic(atomic);
+ } else {
+ throw new UnsupportedOperationException("Unsupported data type: " +
dataType);
+ }
+ }
+
+ public T fields(FieldsDataType dataType, Map<String, Tuple2<String, T>>
fieldResults) {
Review comment:
The name `fields` is matching the `FieldsDataType` , like the
`collection` is matching the `CollectionDataType`, `map` is matching the
`KeyValueDataType`.
##########
File path: versions.props
##########
@@ -1,5 +1,6 @@
org.slf4j:* = 1.7.25
org.apache.avro:avro = 1.9.2
+org.apache.flink:* = 1.10.0
Review comment:
The flink version can be upgraded to 1.10.1 now.
##########
File path: flink/src/main/java/org/apache/iceberg/flink/FlinkTypeToType.java
##########
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.table.types.AtomicDataType;
+import org.apache.flink.table.types.CollectionDataType;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.FieldsDataType;
+import org.apache.flink.table.types.KeyValueDataType;
+import org.apache.flink.table.types.logical.BigIntType;
+import org.apache.flink.table.types.logical.BinaryType;
+import org.apache.flink.table.types.logical.BooleanType;
+import org.apache.flink.table.types.logical.CharType;
+import org.apache.flink.table.types.logical.DateType;
+import org.apache.flink.table.types.logical.DecimalType;
+import org.apache.flink.table.types.logical.DoubleType;
+import org.apache.flink.table.types.logical.FloatType;
+import org.apache.flink.table.types.logical.IntType;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.SmallIntType;
+import org.apache.flink.table.types.logical.TimeType;
+import org.apache.flink.table.types.logical.TimestampType;
+import org.apache.flink.table.types.logical.TinyIntType;
+import org.apache.flink.table.types.logical.VarBinaryType;
+import org.apache.flink.table.types.logical.VarCharType;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+public class FlinkTypeToType extends FlinkTypeVisitor<Type> {
+ private final FieldsDataType root;
+ private int nextId = 0;
+
+ FlinkTypeToType(FieldsDataType root) {
+ this.root = root;
+ // the root struct's fields use the first ids
+ this.nextId = root.getFieldDataTypes().size();
+ }
+
+ private int getNextId() {
+ int next = nextId;
+ nextId += 1;
+ return next;
+ }
+
+ @Override
+ public Type fields(FieldsDataType dataType, Map<String, Tuple2<String,
Type>> types) {
+ List<Types.NestedField> newFields =
Lists.newArrayListWithExpectedSize(types.size());
+ boolean isRoot = root == dataType;
+
+ Map<String, DataType> fieldsMap = dataType.getFieldDataTypes();
+ int index = 0;
+ for (String name : types.keySet()) {
+ assert fieldsMap.containsKey(name);
+ DataType field = fieldsMap.get(name);
+ Tuple2<String, Type> tuple2 = types.get(name);
+
+ int id = isRoot ? index : getNextId();
+ if (field.getLogicalType().isNullable()) {
+ newFields.add(Types.NestedField.optional(id, name, tuple2.f1,
tuple2.f0));
+ } else {
+ newFields.add(Types.NestedField.required(id, name, tuple2.f1,
tuple2.f0));
+ }
+ index++;
+ }
+ return Types.StructType.of(newFields);
+ }
+
+ @Override
+ public Type collection(CollectionDataType collection, Type elementType) {
+ if (collection.getElementDataType().getLogicalType().isNullable()) {
+ return Types.ListType.ofOptional(getNextId(), elementType);
+ } else {
+ return Types.ListType.ofRequired(getNextId(), elementType);
+ }
+ }
+
+ @Override
+ public Type map(KeyValueDataType map, Type keyType, Type valueType) {
+ if (map.getValueDataType().getLogicalType().isNullable()) {
+ return Types.MapType.ofOptional(getNextId(), getNextId(), keyType,
valueType);
+ } else {
+ return Types.MapType.ofRequired(getNextId(), getNextId(), keyType,
valueType);
+ }
+ }
+
+ @SuppressWarnings("checkstyle:CyclomaticComplexity")
+ @Override
+ public Type atomic(AtomicDataType type) {
+ LogicalType inner = type.getLogicalType();
+ if (inner instanceof VarCharType ||
+ inner instanceof CharType) {
+ return Types.StringType.get();
+ } else if (inner instanceof BooleanType) {
+ return Types.BooleanType.get();
+ } else if (inner instanceof IntType ||
+ inner instanceof SmallIntType ||
+ inner instanceof TinyIntType) {
+ return Types.IntegerType.get();
+ } else if (inner instanceof BigIntType) {
+ return Types.LongType.get();
+ } else if (inner instanceof VarBinaryType ||
+ inner instanceof BinaryType) {
+ return Types.BinaryType.get();
+ } else if (inner instanceof FloatType) {
+ return Types.FloatType.get();
+ } else if (inner instanceof DoubleType) {
+ return Types.DoubleType.get();
+ } else if (inner instanceof DateType) {
+ return Types.DateType.get();
+ } else if (inner instanceof TimeType) {
+ return Types.TimeType.get();
+ } else if (inner instanceof TimestampType) {
Review comment:
It's a bug here which I forget to fix before. we have an issue to
address this: https://github.com/generic-datalake/iceberg-pro/issues/30.
Let me address it in this pull request.
##########
File path: flink/src/main/java/org/apache/iceberg/flink/FlinkTypeVisitor.java
##########
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.table.types.AtomicDataType;
+import org.apache.flink.table.types.CollectionDataType;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.FieldsDataType;
+import org.apache.flink.table.types.KeyValueDataType;
+import org.apache.flink.table.types.logical.RowType;
+
+public class FlinkTypeVisitor<T> {
+
+ static <T> T visit(DataType dataType, FlinkTypeVisitor<T> visitor) {
+ if (dataType instanceof FieldsDataType) {
+ FieldsDataType fieldsType = (FieldsDataType) dataType;
+ Map<String, DataType> fields = fieldsType.getFieldDataTypes();
+ Map<String, Tuple2<String, T>> fieldResults = new LinkedHashMap<>();
+ // Make sure that we're traversing the fields in the same order as
constructing the schema's fields.
+ RowType rowType = (RowType) dataType.getLogicalType();
+ for (int i = 0; i < fields.size(); i++) {
+ String name = rowType.getFieldNames().get(i);
+ String comment =
rowType.getFields().get(i).getDescription().orElse(null);
+ fieldResults.put(name, Tuple2.of(comment, visit(fields.get(name),
visitor)));
Review comment:
There are two different datatype class in flink type: DataType and
LogicalType. The DataType have the following types:
1. AtomicDataType;
2. CollectionDataType;
3. FieldsDataType;
4. KeyValueDataType;
and the logicalType the following kinds: ArrayType/CharType/LongType/MapType
etc , it describe more details about the type.
The RowType is a LogicalType, while we defined the visit by accepting
DataType, that's why I need two types here: the logical type to get the name &
comment, while the DataType to passing the argument.
I think I can write this code more clear, please see the next patch.
##########
File path: build.gradle
##########
@@ -235,6 +235,38 @@ project(':iceberg-data') {
}
}
+project(':iceberg-flink') {
+ apply plugin: 'scala'
+
+ dependencies {
+ compile project(':iceberg-api')
+ compile project(':iceberg-common')
+ compile project(':iceberg-core')
+ compile project(':iceberg-data')
+ compile project(':iceberg-orc')
+ compile project(':iceberg-parquet')
+ compile project(':iceberg-arrow')
+ compile "org.apache.flink:flink-streaming-java_2.11::tests"
Review comment:
Yes we have scala 2.12 version. I'm not sure what's the specific problem
you mean in 2.11, You have more information ? thanks.
##########
File path: flink/src/main/java/org/apache/iceberg/flink/FlinkTypeVisitor.java
##########
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.table.types.AtomicDataType;
+import org.apache.flink.table.types.CollectionDataType;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.FieldsDataType;
+import org.apache.flink.table.types.KeyValueDataType;
+import org.apache.flink.table.types.logical.RowType;
+
+public class FlinkTypeVisitor<T> {
+
+ static <T> T visit(DataType dataType, FlinkTypeVisitor<T> visitor) {
+ if (dataType instanceof FieldsDataType) {
+ FieldsDataType fieldsType = (FieldsDataType) dataType;
+ Map<String, DataType> fields = fieldsType.getFieldDataTypes();
+ Map<String, Tuple2<String, T>> fieldResults = new LinkedHashMap<>();
+ // Make sure that we're traversing the fields in the same order as
constructing the schema's fields.
+ RowType rowType = (RowType) dataType.getLogicalType();
+ for (int i = 0; i < fields.size(); i++) {
+ String name = rowType.getFieldNames().get(i);
+ String comment =
rowType.getFields().get(i).getDescription().orElse(null);
+ fieldResults.put(name, Tuple2.of(comment, visit(fields.get(name),
visitor)));
Review comment:
The reason why I add a `Tuple2/Pair` here because the flink `Type` did
not expose the interface to access the `comment`, so I have to maintain it by
myself. I got your points about `before` or `after` hooks. In the worst case,
the hooks may also accept the `Map<String, Pair<String, Type>>` as an argument.
we may could do better ..
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]