This is an automated email from the ASF dual-hosted git repository.

etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new d894a0283e Core: Align CharSequenceSet impl with Data/DeleteFileSet 
(#11322)
d894a0283e is described below

commit d894a0283e137c44d910acfc4a77119d9df2a005
Author: Eduard Tudenhoefner <[email protected]>
AuthorDate: Wed Dec 10 20:07:58 2025 +0100

    Core: Align CharSequenceSet impl with Data/DeleteFileSet (#11322)
---
 .../org/apache/iceberg/util/CharSequenceSet.java   | 178 +++------------------
 .../apache/iceberg/util/CharSequenceWrapper.java   |   5 +-
 .../apache/iceberg/util/TestCharSequenceSet.java   | 175 +++++++++++++++++++-
 3 files changed, 197 insertions(+), 161 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java 
b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
index cfdac0104c..c13cbfd0cc 100644
--- a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
+++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
@@ -18,183 +18,53 @@
  */
 package org.apache.iceberg.util;
 
-import java.io.Serializable;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.Set;
-import java.util.stream.Collectors;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
-import org.apache.iceberg.relocated.com.google.common.collect.Iterators;
-import org.apache.iceberg.relocated.com.google.common.collect.Sets;
-import org.apache.iceberg.relocated.com.google.common.collect.Streams;
 
-public class CharSequenceSet implements Set<CharSequence>, Serializable {
+public class CharSequenceSet extends WrapperSet<CharSequence> {
   private static final ThreadLocal<CharSequenceWrapper> WRAPPERS =
       ThreadLocal.withInitial(() -> CharSequenceWrapper.wrap(null));
 
-  public static CharSequenceSet of(Iterable<CharSequence> charSequences) {
-    return new CharSequenceSet(charSequences);
-  }
-
-  public static CharSequenceSet empty() {
-    return new CharSequenceSet(ImmutableList.of());
-  }
-
-  private final Set<CharSequenceWrapper> wrapperSet;
-
-  private CharSequenceSet(Iterable<CharSequence> charSequences) {
-    this.wrapperSet =
-        Sets.newHashSet(Iterables.transform(charSequences, 
CharSequenceWrapper::wrap));
+  private CharSequenceSet() {
+    // needed for serialization
   }
 
-  @Override
-  public int size() {
-    return wrapperSet.size();
+  private CharSequenceSet(Iterable<? extends CharSequence> charSequences) {
+    super(
+        Iterables.transform(
+            charSequences,
+            obj -> {
+              Preconditions.checkNotNull(obj, "Invalid object: null");
+              return CharSequenceWrapper.wrap(obj);
+            }));
   }
 
-  @Override
-  public boolean isEmpty() {
-    return wrapperSet.isEmpty();
+  public static CharSequenceSet of(Iterable<? extends CharSequence> 
charSequences) {
+    return new CharSequenceSet(charSequences);
   }
 
-  @Override
-  public boolean contains(Object obj) {
-    if (obj instanceof CharSequence) {
-      CharSequenceWrapper wrapper = WRAPPERS.get();
-      boolean result = wrapperSet.contains(wrapper.set((CharSequence) obj));
-      wrapper.set(null); // don't hold a reference to the value
-      return result;
-    }
-    return false;
+  public static CharSequenceSet empty() {
+    return new CharSequenceSet();
   }
 
   @Override
-  public Iterator<CharSequence> iterator() {
-    return Iterators.transform(wrapperSet.iterator(), 
CharSequenceWrapper::get);
+  protected Wrapper<CharSequence> wrapper() {
+    return WRAPPERS.get();
   }
 
   @Override
-  public Object[] toArray() {
-    return Iterators.toArray(iterator(), CharSequence.class);
+  protected Wrapper<CharSequence> wrap(CharSequence file) {
+    return CharSequenceWrapper.wrap(file);
   }
 
   @Override
-  @SuppressWarnings("unchecked")
-  public <T> T[] toArray(T[] destArray) {
-    int size = wrapperSet.size();
-    if (destArray.length < size) {
-      return (T[]) toArray();
-    }
-
-    Iterator<CharSequence> iter = iterator();
-    int ind = 0;
-    while (iter.hasNext()) {
-      destArray[ind] = (T) iter.next();
-      ind += 1;
-    }
-
-    if (destArray.length > size) {
-      destArray[size] = null;
-    }
-
-    return destArray;
+  protected Class<CharSequence> elementClass() {
+    return CharSequence.class;
   }
 
   @Override
   public boolean add(CharSequence charSequence) {
-    return wrapperSet.add(CharSequenceWrapper.wrap(charSequence));
-  }
-
-  @Override
-  public boolean remove(Object obj) {
-    if (obj instanceof CharSequence) {
-      CharSequenceWrapper wrapper = WRAPPERS.get();
-      boolean result = wrapperSet.remove(wrapper.set((CharSequence) obj));
-      wrapper.set(null); // don't hold a reference to the value
-      return result;
-    }
-    return false;
-  }
-
-  @Override
-  @SuppressWarnings("CollectionUndefinedEquality")
-  public boolean containsAll(Collection<?> objects) {
-    if (objects != null) {
-      return Iterables.all(objects, this::contains);
-    }
-    return false;
-  }
-
-  @Override
-  public boolean addAll(Collection<? extends CharSequence> charSequences) {
-    if (charSequences != null) {
-      return Iterables.addAll(
-          wrapperSet, Iterables.transform(charSequences, 
CharSequenceWrapper::wrap));
-    }
-    return false;
-  }
-
-  @Override
-  public boolean retainAll(Collection<?> objects) {
-    if (objects != null) {
-      Set<CharSequenceWrapper> toRetain =
-          objects.stream()
-              .filter(CharSequence.class::isInstance)
-              .map(CharSequence.class::cast)
-              .map(CharSequenceWrapper::wrap)
-              .collect(Collectors.toSet());
-
-      return Iterables.retainAll(wrapperSet, toRetain);
-    }
-
-    return false;
-  }
-
-  @Override
-  @SuppressWarnings("CollectionUndefinedEquality")
-  public boolean removeAll(Collection<?> objects) {
-    if (objects != null) {
-      return objects.stream().filter(this::remove).count() != 0;
-    }
-
-    return false;
-  }
-
-  @Override
-  public void clear() {
-    wrapperSet.clear();
-  }
-
-  @SuppressWarnings("CollectionUndefinedEquality")
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) {
-      return true;
-    } else if (!(other instanceof Set)) {
-      return false;
-    }
-
-    Set<?> that = (Set<?>) other;
-
-    if (size() != that.size()) {
-      return false;
-    }
-
-    try {
-      return containsAll(that);
-    } catch (ClassCastException | NullPointerException unused) {
-      return false;
-    }
-  }
-
-  @Override
-  public int hashCode() {
-    return wrapperSet.stream().mapToInt(CharSequenceWrapper::hashCode).sum();
-  }
-
-  @Override
-  public String toString() {
-    return 
Streams.stream(iterator()).collect(Collectors.joining("CharSequenceSet({", ", 
", "})"));
+    // method is needed to not break API compatibility
+    return super.add(charSequence);
   }
 }
diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java 
b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java
index 854264c1ae..59e8eb712d 100644
--- a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java
+++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java
@@ -18,12 +18,11 @@
  */
 package org.apache.iceberg.util;
 
-import java.io.Serializable;
 import org.apache.iceberg.types.Comparators;
 import org.apache.iceberg.types.JavaHashes;
 
 /** Wrapper class to adapt CharSequence for use in maps and sets. */
-public class CharSequenceWrapper implements CharSequence, Serializable {
+public class CharSequenceWrapper implements CharSequence, 
WrapperSet.Wrapper<CharSequence> {
   public static CharSequenceWrapper wrap(CharSequence seq) {
     return new CharSequenceWrapper(seq);
   }
@@ -39,6 +38,7 @@ public class CharSequenceWrapper implements CharSequence, 
Serializable {
     this.wrapped = wrapped;
   }
 
+  @Override
   public CharSequenceWrapper set(CharSequence newWrapped) {
     this.wrapped = newWrapped;
     this.hashCode = 0;
@@ -46,6 +46,7 @@ public class CharSequenceWrapper implements CharSequence, 
Serializable {
     return this;
   }
 
+  @Override
   public CharSequence get() {
     return wrapped;
   }
diff --git a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java 
b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
index 324742c07a..093d2a0c6b 100644
--- a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
+++ b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
@@ -19,10 +19,12 @@
 package org.apache.iceberg.util;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Set;
+import org.apache.iceberg.TestHelpers;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.junit.jupiter.api.Test;
@@ -42,15 +44,115 @@ public class TestCharSequenceSet {
 
   @Test
   public void nullString() {
-    assertThat(CharSequenceSet.of(Arrays.asList((String) 
null))).contains((String) null);
+    assertThatThrownBy(() -> CharSequenceSet.of(Arrays.asList((String) null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
     assertThat(CharSequenceSet.empty()).doesNotContain((String) null);
   }
 
+  @Test
+  public void emptySet() {
+    assertThat(CharSequenceSet.empty()).isEmpty();
+    assertThat(CharSequenceSet.empty()).doesNotContain("a", "b", "c");
+  }
+
+  @Test
+  public void insertionOrderIsMaintained() {
+    CharSequenceSet set = CharSequenceSet.empty();
+    set.addAll(ImmutableList.of("d", "a", "c"));
+    set.add("b");
+    set.add("d");
+
+    assertThat(set).hasSize(4).containsExactly("d", "a", "c", "b");
+  }
+
+  @Test
+  public void clear() {
+    CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("a", "b"));
+    set.clear();
+    assertThat(set).isEmpty();
+  }
+
+  @Test
+  public void addAll() {
+    CharSequenceSet empty = CharSequenceSet.empty();
+    assertThatThrownBy(() -> empty.add(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.addAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> empty.addAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.addAll(Arrays.asList("a", null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    CharSequenceSet set = CharSequenceSet.empty();
+    set.addAll(ImmutableList.of("b", "a", "c", "a"));
+    assertThat(set).hasSize(3).containsExactly("b", "a", "c");
+  }
+
+  @Test
+  public void contains() {
+    CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("b", "a"));
+    assertThatThrownBy(() -> set.contains(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThat(set).hasSize(2).containsExactly("b", 
"a").doesNotContain("c").doesNotContain("d");
+
+    assertThatThrownBy(() -> CharSequenceSet.of(Arrays.asList("c", "b", null, 
"a")))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+  }
+
+  @Test
+  public void containsAll() {
+    CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("b", "a"));
+    assertThatThrownBy(() -> set.containsAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> set.containsAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> set.containsAll(Arrays.asList("a", null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThat(set.containsAll(ImmutableList.of("a", "b"))).isTrue();
+    assertThat(set.containsAll(ImmutableList.of("b", "a", "c"))).isFalse();
+    assertThat(set.containsAll(ImmutableList.of("b"))).isTrue();
+  }
+
   @Test
   public void testRetainAll() {
+    CharSequenceSet empty = CharSequenceSet.empty();
+    assertThatThrownBy(() -> empty.retainAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> empty.retainAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.retainAll(Arrays.asList("123", null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.retainAll(ImmutableList.of("456", "789", 
123)))
+        .isInstanceOf(ClassCastException.class)
+        .hasMessage("Cannot cast java.lang.Integer to java.lang.CharSequence");
+
     CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456"));
 
-    assertThat(set.retainAll(ImmutableList.of("456", "789", 123)))
+    assertThat(set.retainAll(ImmutableList.of("456", "789", "555")))
         .overridingErrorMessage("Set should be changed")
         .isTrue();
 
@@ -61,24 +163,74 @@ public class TestCharSequenceSet {
         .overridingErrorMessage("Set should not be changed")
         .isFalse();
 
-    assertThat(set.retainAll(ImmutableList.of(123, 456)))
+    assertThat(set.retainAll(ImmutableList.of("555", "789")))
         .overridingErrorMessage("Set should be changed")
         .isTrue();
 
     assertThat(set).isEmpty();
   }
 
+  @Test
+  public void toArray() {
+    CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("b", "a"));
+    assertThat(set.toArray()).hasSize(2).containsExactly("b", "a");
+
+    CharSequence[] array = new CharSequence[1];
+    assertThat(set.toArray(array)).hasSize(2).containsExactly("b", "a");
+
+    array = new CharSequence[0];
+    assertThat(set.toArray(array)).hasSize(2).containsExactly("b", "a");
+
+    array = new CharSequence[5];
+    assertThat(set.toArray(array)).hasSize(5).containsExactly("b", "a", null, 
null, null);
+
+    array = new CharSequence[2];
+    assertThat(set.toArray(array)).hasSize(2).containsExactly("b", "a");
+  }
+
+  @Test
+  public void remove() {
+    CharSequenceSet set = CharSequenceSet.of(ImmutableSet.of("a", "b", "c"));
+    assertThatThrownBy(() -> set.remove(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    set.remove("a");
+    assertThat(set).containsExactly("b", "c");
+    set.remove("b");
+    assertThat(set).containsExactly("c");
+    set.remove("c");
+    assertThat(set).isEmpty();
+  }
+
   @Test
   public void testRemoveAll() {
+    CharSequenceSet empty = CharSequenceSet.empty();
+    assertThatThrownBy(() -> empty.removeAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> empty.removeAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.removeAll(Arrays.asList("123", null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.removeAll(ImmutableList.of("123", 456)))
+        .isInstanceOf(ClassCastException.class)
+        .hasMessage("Cannot cast java.lang.Integer to java.lang.CharSequence");
+
     CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456"));
-    assertThat(set.removeAll(ImmutableList.of("456", "789", 123)))
+    assertThat(set.removeAll(ImmutableList.of("456", "789")))
         .overridingErrorMessage("Set should be changed")
         .isTrue();
 
     assertThat(set).hasSize(1).contains("123");
 
     set = CharSequenceSet.of(ImmutableList.of("123", "456"));
-    assertThat(set.removeAll(ImmutableList.of(123, 456)))
+    assertThat(set.removeAll(ImmutableList.of("333", "789")))
         .overridingErrorMessage("Set should not be changed")
         .isFalse();
 
@@ -119,4 +271,17 @@ public class TestCharSequenceSet {
         .isEqualTo(set3.hashCode())
         .isEqualTo(set4.hashCode());
   }
+
+  @Test
+  public void kryoSerialization() throws Exception {
+    CharSequenceSet charSequences = CharSequenceSet.of(ImmutableList.of("c", 
"b", "a"));
+    
assertThat(TestHelpers.KryoHelpers.roundTripSerialize(charSequences)).isEqualTo(charSequences);
+  }
+
+  @Test
+  public void javaSerialization() throws Exception {
+    CharSequenceSet charSequences = CharSequenceSet.of(ImmutableList.of("c", 
"b", "a"));
+    CharSequenceSet deserialize = 
TestHelpers.deserialize(TestHelpers.serialize(charSequences));
+    assertThat(deserialize).isEqualTo(charSequences);
+  }
 }

Reply via email to