This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new d98224a API: Fix schema name conflicts (#1336)
d98224a is described below
commit d98224a82b104888281d4e901ccf948f9642590b
Author: Ryan Blue <[email protected]>
AuthorDate: Wed Sep 2 15:57:18 2020 -0700
API: Fix schema name conflicts (#1336)
---
api/src/main/java/org/apache/iceberg/Schema.java | 27 ++++++---
.../java/org/apache/iceberg/types/IndexByName.java | 66 +++++++++++++++++++---
.../java/org/apache/iceberg/types/TypeUtil.java | 10 +++-
3 files changed, 85 insertions(+), 18 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/Schema.java
b/api/src/main/java/org/apache/iceberg/Schema.java
index 0ecbfd4..fb1073d 100644
--- a/api/src/main/java/org/apache/iceberg/Schema.java
+++ b/api/src/main/java/org/apache/iceberg/Schema.java
@@ -31,6 +31,7 @@ import
org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.BiMap;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableBiMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
@@ -47,21 +48,22 @@ public class Schema implements Serializable {
private final StructType struct;
private transient BiMap<String, Integer> aliasToId = null;
private transient Map<Integer, NestedField> idToField = null;
- private transient BiMap<String, Integer> nameToId = null;
- private transient BiMap<String, Integer> lowerCaseNameToId = null;
+ private transient Map<String, Integer> nameToId = null;
+ private transient Map<String, Integer> lowerCaseNameToId = null;
private transient Map<Integer, Accessor<StructLike>> idToAccessor = null;
+ private transient Map<Integer, String> idToName = null;
public Schema(List<NestedField> columns, Map<String, Integer> aliases) {
this.struct = StructType.of(columns);
this.aliasToId = aliases != null ? ImmutableBiMap.copyOf(aliases) : null;
// validate the schema through IndexByName visitor
- lazyNameToId();
+ lazyIdToName();
}
public Schema(List<NestedField> columns) {
this.struct = StructType.of(columns);
- lazyNameToId();
+ lazyIdToName();
}
public Schema(NestedField... columns) {
@@ -75,16 +77,23 @@ public class Schema implements Serializable {
return idToField;
}
- private BiMap<String, Integer> lazyNameToId() {
+ private Map<String, Integer> lazyNameToId() {
if (nameToId == null) {
- this.nameToId = ImmutableBiMap.copyOf(TypeUtil.indexByName(struct));
+ this.nameToId = ImmutableMap.copyOf(TypeUtil.indexByName(struct));
}
return nameToId;
}
- private BiMap<String, Integer> lazyLowerCaseNameToId() {
+ private Map<Integer, String> lazyIdToName() {
+ if (idToName == null) {
+ this.idToName = ImmutableMap.copyOf(TypeUtil.indexNameById(struct));
+ }
+ return idToName;
+ }
+
+ private Map<String, Integer> lazyLowerCaseNameToId() {
if (lowerCaseNameToId == null) {
- this.lowerCaseNameToId =
ImmutableBiMap.copyOf(TypeUtil.indexByLowerCaseName(struct));
+ this.lowerCaseNameToId =
ImmutableMap.copyOf(TypeUtil.indexByLowerCaseName(struct));
}
return lowerCaseNameToId;
}
@@ -206,7 +215,7 @@ public class Schema implements Serializable {
* @return the full column name in this schema that resolves to the id
*/
public String findColumnName(int id) {
- return lazyNameToId().inverse().get(id);
+ return lazyIdToName().get(id);
}
/**
diff --git a/api/src/main/java/org/apache/iceberg/types/IndexByName.java
b/api/src/main/java/org/apache/iceberg/types/IndexByName.java
index 2d61c56..55f0a2d 100644
--- a/api/src/main/java/org/apache/iceberg/types/IndexByName.java
+++ b/api/src/main/java/org/apache/iceberg/types/IndexByName.java
@@ -25,6 +25,7 @@ import java.util.Map;
import org.apache.iceberg.Schema;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
@@ -32,32 +33,71 @@ public class IndexByName extends
TypeUtil.SchemaVisitor<Map<String, Integer>> {
private static final Joiner DOT = Joiner.on(".");
private final Deque<String> fieldNames = Lists.newLinkedList();
+ private final Deque<String> shortFieldNames = Lists.newLinkedList();
private final Map<String, Integer> nameToId = Maps.newHashMap();
+ private final Map<String, Integer> shortNameToId = Maps.newHashMap();
+
+ /**
+ * Returns a mapping from full field name to ID.
+ * <p>
+ * Short names for maps and lists are included for any name that does not
conflict with a canonical name. For example,
+ * a list, 'l', of structs with field 'x' will produce short name 'l.x' in
addition to canonical name 'l.element.x'.
+ *
+ * @return a map from name to field ID
+ */
+ public Map<String, Integer> byName() {
+ ImmutableMap.Builder<String, Integer> builder = ImmutableMap.builder();
+ builder.putAll(nameToId);
+ // add all short names that do not conflict with canonical names
+ shortNameToId.entrySet().stream()
+ .filter(entry -> !nameToId.containsKey(entry.getKey()))
+ .forEach(builder::put);
+ return builder.build();
+ }
+
+ /**
+ * Returns a mapping from field ID to full name.
+ * <p>
+ * Canonical names, not short names are returned, for example
'list.element.field' instead of 'list.field'.
+ *
+ * @return a map from field ID to name
+ */
+ public Map<Integer, String> byId() {
+ ImmutableMap.Builder<Integer, String> builder = ImmutableMap.builder();
+ nameToId.forEach((key, value) -> builder.put(value, key));
+ return builder.build();
+ }
@Override
public void beforeField(Types.NestedField field) {
fieldNames.push(field.name());
+ shortFieldNames.push(field.name());
}
@Override
public void afterField(Types.NestedField field) {
fieldNames.pop();
+ shortFieldNames.pop();
}
@Override
public void beforeListElement(Types.NestedField elementField) {
- // only add "element" to the name if the element is not a struct, so that
names are more natural
+ fieldNames.push(elementField.name());
+
+ // only add "element" to the short name if the element is not a struct, so
that names are more natural
// for example, locations.latitude instead of locations.element.latitude
if (!elementField.type().isStructType()) {
- beforeField(elementField);
+ shortFieldNames.push(elementField.name());
}
}
@Override
public void afterListElement(Types.NestedField elementField) {
+ fieldNames.pop();
+
// only remove "element" if it was added
if (!elementField.type().isStructType()) {
- afterField(elementField);
+ shortFieldNames.pop();
}
}
@@ -73,17 +113,21 @@ public class IndexByName extends
TypeUtil.SchemaVisitor<Map<String, Integer>> {
@Override
public void beforeMapValue(Types.NestedField valueField) {
+ fieldNames.push(valueField.name());
+
// only add "value" to the name if the value is not a struct, so that
names are more natural
if (!valueField.type().isStructType()) {
- beforeField(valueField);
+ shortFieldNames.push(valueField.name());
}
}
@Override
public void afterMapValue(Types.NestedField valueField) {
+ fieldNames.pop();
+
// only remove "value" if it was added
if (!valueField.type().isStructType()) {
- afterField(valueField);
+ shortFieldNames.pop();
}
}
@@ -128,9 +172,15 @@ public class IndexByName extends
TypeUtil.SchemaVisitor<Map<String, Integer>> {
}
Integer existingFieldId = nameToId.put(fullName, fieldId);
- if (existingFieldId != null && !"element".equals(name) &&
!"value".equals(name)) {
- throw new ValidationException(
- "Invalid schema: multiple fields for name %s: %s and %s", fullName,
existingFieldId, fieldId);
+ ValidationException.check(existingFieldId == null,
+ "Invalid schema: multiple fields for name %s: %s and %s", fullName,
existingFieldId, fieldId);
+
+ // also track the short name, if this is a nested field
+ if (!shortFieldNames.isEmpty()) {
+ String shortName =
DOT.join(DOT.join(shortFieldNames.descendingIterator()), name);
+ if (!shortNameToId.containsKey(shortName)) {
+ shortNameToId.put(shortName, fieldId);
+ }
}
}
}
diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
index 95673eb..8918f98 100644
--- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
+++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
@@ -106,7 +106,15 @@ public class TypeUtil {
}
public static Map<String, Integer> indexByName(Types.StructType struct) {
- return visit(struct, new IndexByName());
+ IndexByName indexer = new IndexByName();
+ visit(struct, indexer);
+ return indexer.byName();
+ }
+
+ public static Map<Integer, String> indexNameById(Types.StructType struct) {
+ IndexByName indexer = new IndexByName();
+ visit(struct, indexer);
+ return indexer.byId();
}
public static Map<String, Integer> indexByLowerCaseName(Types.StructType
struct) {