This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new d98224a  API: Fix schema name conflicts (#1336)
d98224a is described below

commit d98224a82b104888281d4e901ccf948f9642590b
Author: Ryan Blue <[email protected]>
AuthorDate: Wed Sep 2 15:57:18 2020 -0700

    API: Fix schema name conflicts (#1336)
---
 api/src/main/java/org/apache/iceberg/Schema.java   | 27 ++++++---
 .../java/org/apache/iceberg/types/IndexByName.java | 66 +++++++++++++++++++---
 .../java/org/apache/iceberg/types/TypeUtil.java    | 10 +++-
 3 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/Schema.java 
b/api/src/main/java/org/apache/iceberg/Schema.java
index 0ecbfd4..fb1073d 100644
--- a/api/src/main/java/org/apache/iceberg/Schema.java
+++ b/api/src/main/java/org/apache/iceberg/Schema.java
@@ -31,6 +31,7 @@ import 
org.apache.iceberg.relocated.com.google.common.base.Joiner;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.BiMap;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableBiMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
 import org.apache.iceberg.types.Type;
 import org.apache.iceberg.types.TypeUtil;
@@ -47,21 +48,22 @@ public class Schema implements Serializable {
   private final StructType struct;
   private transient BiMap<String, Integer> aliasToId = null;
   private transient Map<Integer, NestedField> idToField = null;
-  private transient BiMap<String, Integer> nameToId = null;
-  private transient BiMap<String, Integer> lowerCaseNameToId = null;
+  private transient Map<String, Integer> nameToId = null;
+  private transient Map<String, Integer> lowerCaseNameToId = null;
   private transient Map<Integer, Accessor<StructLike>> idToAccessor = null;
+  private transient Map<Integer, String> idToName = null;
 
   public Schema(List<NestedField> columns, Map<String, Integer> aliases) {
     this.struct = StructType.of(columns);
     this.aliasToId = aliases != null ? ImmutableBiMap.copyOf(aliases) : null;
 
     // validate the schema through IndexByName visitor
-    lazyNameToId();
+    lazyIdToName();
   }
 
   public Schema(List<NestedField> columns) {
     this.struct = StructType.of(columns);
-    lazyNameToId();
+    lazyIdToName();
   }
 
   public Schema(NestedField... columns) {
@@ -75,16 +77,23 @@ public class Schema implements Serializable {
     return idToField;
   }
 
-  private BiMap<String, Integer> lazyNameToId() {
+  private Map<String, Integer> lazyNameToId() {
     if (nameToId == null) {
-      this.nameToId = ImmutableBiMap.copyOf(TypeUtil.indexByName(struct));
+      this.nameToId = ImmutableMap.copyOf(TypeUtil.indexByName(struct));
     }
     return nameToId;
   }
 
-  private BiMap<String, Integer> lazyLowerCaseNameToId() {
+  private Map<Integer, String> lazyIdToName() {
+    if (idToName == null) {
+      this.idToName = ImmutableMap.copyOf(TypeUtil.indexNameById(struct));
+    }
+    return idToName;
+  }
+
+  private Map<String, Integer> lazyLowerCaseNameToId() {
     if (lowerCaseNameToId == null) {
-      this.lowerCaseNameToId = 
ImmutableBiMap.copyOf(TypeUtil.indexByLowerCaseName(struct));
+      this.lowerCaseNameToId = 
ImmutableMap.copyOf(TypeUtil.indexByLowerCaseName(struct));
     }
     return lowerCaseNameToId;
   }
@@ -206,7 +215,7 @@ public class Schema implements Serializable {
    * @return the full column name in this schema that resolves to the id
    */
   public String findColumnName(int id) {
-    return lazyNameToId().inverse().get(id);
+    return lazyIdToName().get(id);
   }
 
   /**
diff --git a/api/src/main/java/org/apache/iceberg/types/IndexByName.java 
b/api/src/main/java/org/apache/iceberg/types/IndexByName.java
index 2d61c56..55f0a2d 100644
--- a/api/src/main/java/org/apache/iceberg/types/IndexByName.java
+++ b/api/src/main/java/org/apache/iceberg/types/IndexByName.java
@@ -25,6 +25,7 @@ import java.util.Map;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.relocated.com.google.common.base.Joiner;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 
@@ -32,32 +33,71 @@ public class IndexByName extends 
TypeUtil.SchemaVisitor<Map<String, Integer>> {
   private static final Joiner DOT = Joiner.on(".");
 
   private final Deque<String> fieldNames = Lists.newLinkedList();
+  private final Deque<String> shortFieldNames = Lists.newLinkedList();
   private final Map<String, Integer> nameToId = Maps.newHashMap();
+  private final Map<String, Integer> shortNameToId = Maps.newHashMap();
+
+  /**
+   * Returns a mapping from full field name to ID.
+   * <p>
+   * Short names for maps and lists are included for any name that does not 
conflict with a canonical name. For example,
+   * a list, 'l', of structs with field 'x' will produce short name 'l.x' in 
addition to canonical name 'l.element.x'.
+   *
+   * @return a map from name to field ID
+   */
+  public Map<String, Integer> byName() {
+    ImmutableMap.Builder<String, Integer> builder = ImmutableMap.builder();
+    builder.putAll(nameToId);
+    // add all short names that do not conflict with canonical names
+    shortNameToId.entrySet().stream()
+        .filter(entry -> !nameToId.containsKey(entry.getKey()))
+        .forEach(builder::put);
+    return builder.build();
+  }
+
+  /**
+   * Returns a mapping from field ID to full name.
+   * <p>
+   * Canonical names, not short names are returned, for example 
'list.element.field' instead of 'list.field'.
+   *
+   * @return a map from field ID to name
+   */
+  public Map<Integer, String> byId() {
+    ImmutableMap.Builder<Integer, String> builder = ImmutableMap.builder();
+    nameToId.forEach((key, value) -> builder.put(value, key));
+    return builder.build();
+  }
 
   @Override
   public void beforeField(Types.NestedField field) {
     fieldNames.push(field.name());
+    shortFieldNames.push(field.name());
   }
 
   @Override
   public void afterField(Types.NestedField field) {
     fieldNames.pop();
+    shortFieldNames.pop();
   }
 
   @Override
   public void beforeListElement(Types.NestedField elementField) {
-    // only add "element" to the name if the element is not a struct, so that 
names are more natural
+    fieldNames.push(elementField.name());
+
+    // only add "element" to the short name if the element is not a struct, so 
that names are more natural
     // for example, locations.latitude instead of locations.element.latitude
     if (!elementField.type().isStructType()) {
-      beforeField(elementField);
+      shortFieldNames.push(elementField.name());
     }
   }
 
   @Override
   public void afterListElement(Types.NestedField elementField) {
+    fieldNames.pop();
+
     // only remove "element" if it was added
     if (!elementField.type().isStructType()) {
-      afterField(elementField);
+      shortFieldNames.pop();
     }
   }
 
@@ -73,17 +113,21 @@ public class IndexByName extends 
TypeUtil.SchemaVisitor<Map<String, Integer>> {
 
   @Override
   public void beforeMapValue(Types.NestedField valueField) {
+    fieldNames.push(valueField.name());
+
     // only add "value" to the name if the value is not a struct, so that 
names are more natural
     if (!valueField.type().isStructType()) {
-      beforeField(valueField);
+      shortFieldNames.push(valueField.name());
     }
   }
 
   @Override
   public void afterMapValue(Types.NestedField valueField) {
+    fieldNames.pop();
+
     // only remove "value" if it was added
     if (!valueField.type().isStructType()) {
-      afterField(valueField);
+      shortFieldNames.pop();
     }
   }
 
@@ -128,9 +172,15 @@ public class IndexByName extends 
TypeUtil.SchemaVisitor<Map<String, Integer>> {
     }
 
     Integer existingFieldId = nameToId.put(fullName, fieldId);
-    if (existingFieldId != null && !"element".equals(name) && 
!"value".equals(name)) {
-      throw new ValidationException(
-          "Invalid schema: multiple fields for name %s: %s and %s", fullName, 
existingFieldId, fieldId);
+    ValidationException.check(existingFieldId == null,
+        "Invalid schema: multiple fields for name %s: %s and %s", fullName, 
existingFieldId, fieldId);
+
+    // also track the short name, if this is a nested field
+    if (!shortFieldNames.isEmpty()) {
+      String shortName = 
DOT.join(DOT.join(shortFieldNames.descendingIterator()), name);
+      if (!shortNameToId.containsKey(shortName)) {
+        shortNameToId.put(shortName, fieldId);
+      }
     }
   }
 }
diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java 
b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
index 95673eb..8918f98 100644
--- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
+++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
@@ -106,7 +106,15 @@ public class TypeUtil {
   }
 
   public static Map<String, Integer> indexByName(Types.StructType struct) {
-    return visit(struct, new IndexByName());
+    IndexByName indexer = new IndexByName();
+    visit(struct, indexer);
+    return indexer.byName();
+  }
+
+  public static Map<Integer, String> indexNameById(Types.StructType struct) {
+    IndexByName indexer = new IndexByName();
+    visit(struct, indexer);
+    return indexer.byId();
   }
 
   public static Map<String, Integer> indexByLowerCaseName(Types.StructType 
struct) {

Reply via email to