This is an automated email from the ASF dual-hosted git repository.
etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new d894a0283e Core: Align CharSequenceSet impl with Data/DeleteFileSet
(#11322)
d894a0283e is described below
commit d894a0283e137c44d910acfc4a77119d9df2a005
Author: Eduard Tudenhoefner <[email protected]>
AuthorDate: Wed Dec 10 20:07:58 2025 +0100
Core: Align CharSequenceSet impl with Data/DeleteFileSet (#11322)
---
.../org/apache/iceberg/util/CharSequenceSet.java | 178 +++------------------
.../apache/iceberg/util/CharSequenceWrapper.java | 5 +-
.../apache/iceberg/util/TestCharSequenceSet.java | 175 +++++++++++++++++++-
3 files changed, 197 insertions(+), 161 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
index cfdac0104c..c13cbfd0cc 100644
--- a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
+++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
@@ -18,183 +18,53 @@
*/
package org.apache.iceberg.util;
-import java.io.Serializable;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.Set;
-import java.util.stream.Collectors;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
-import org.apache.iceberg.relocated.com.google.common.collect.Iterators;
-import org.apache.iceberg.relocated.com.google.common.collect.Sets;
-import org.apache.iceberg.relocated.com.google.common.collect.Streams;
-public class CharSequenceSet implements Set<CharSequence>, Serializable {
+public class CharSequenceSet extends WrapperSet<CharSequence> {
private static final ThreadLocal<CharSequenceWrapper> WRAPPERS =
ThreadLocal.withInitial(() -> CharSequenceWrapper.wrap(null));
- public static CharSequenceSet of(Iterable<CharSequence> charSequences) {
- return new CharSequenceSet(charSequences);
- }
-
- public static CharSequenceSet empty() {
- return new CharSequenceSet(ImmutableList.of());
- }
-
- private final Set<CharSequenceWrapper> wrapperSet;
-
- private CharSequenceSet(Iterable<CharSequence> charSequences) {
- this.wrapperSet =
- Sets.newHashSet(Iterables.transform(charSequences,
CharSequenceWrapper::wrap));
+ private CharSequenceSet() {
+ // needed for serialization
}
- @Override
- public int size() {
- return wrapperSet.size();
+ private CharSequenceSet(Iterable<? extends CharSequence> charSequences) {
+ super(
+ Iterables.transform(
+ charSequences,
+ obj -> {
+ Preconditions.checkNotNull(obj, "Invalid object: null");
+ return CharSequenceWrapper.wrap(obj);
+ }));
}
- @Override
- public boolean isEmpty() {
- return wrapperSet.isEmpty();
+ public static CharSequenceSet of(Iterable<? extends CharSequence>
charSequences) {
+ return new CharSequenceSet(charSequences);
}
- @Override
- public boolean contains(Object obj) {
- if (obj instanceof CharSequence) {
- CharSequenceWrapper wrapper = WRAPPERS.get();
- boolean result = wrapperSet.contains(wrapper.set((CharSequence) obj));
- wrapper.set(null); // don't hold a reference to the value
- return result;
- }
- return false;
+ public static CharSequenceSet empty() {
+ return new CharSequenceSet();
}
@Override
- public Iterator<CharSequence> iterator() {
- return Iterators.transform(wrapperSet.iterator(),
CharSequenceWrapper::get);
+ protected Wrapper<CharSequence> wrapper() {
+ return WRAPPERS.get();
}
@Override
- public Object[] toArray() {
- return Iterators.toArray(iterator(), CharSequence.class);
+ protected Wrapper<CharSequence> wrap(CharSequence file) {
+ return CharSequenceWrapper.wrap(file);
}
@Override
- @SuppressWarnings("unchecked")
- public <T> T[] toArray(T[] destArray) {
- int size = wrapperSet.size();
- if (destArray.length < size) {
- return (T[]) toArray();
- }
-
- Iterator<CharSequence> iter = iterator();
- int ind = 0;
- while (iter.hasNext()) {
- destArray[ind] = (T) iter.next();
- ind += 1;
- }
-
- if (destArray.length > size) {
- destArray[size] = null;
- }
-
- return destArray;
+ protected Class<CharSequence> elementClass() {
+ return CharSequence.class;
}
@Override
public boolean add(CharSequence charSequence) {
- return wrapperSet.add(CharSequenceWrapper.wrap(charSequence));
- }
-
- @Override
- public boolean remove(Object obj) {
- if (obj instanceof CharSequence) {
- CharSequenceWrapper wrapper = WRAPPERS.get();
- boolean result = wrapperSet.remove(wrapper.set((CharSequence) obj));
- wrapper.set(null); // don't hold a reference to the value
- return result;
- }
- return false;
- }
-
- @Override
- @SuppressWarnings("CollectionUndefinedEquality")
- public boolean containsAll(Collection<?> objects) {
- if (objects != null) {
- return Iterables.all(objects, this::contains);
- }
- return false;
- }
-
- @Override
- public boolean addAll(Collection<? extends CharSequence> charSequences) {
- if (charSequences != null) {
- return Iterables.addAll(
- wrapperSet, Iterables.transform(charSequences,
CharSequenceWrapper::wrap));
- }
- return false;
- }
-
- @Override
- public boolean retainAll(Collection<?> objects) {
- if (objects != null) {
- Set<CharSequenceWrapper> toRetain =
- objects.stream()
- .filter(CharSequence.class::isInstance)
- .map(CharSequence.class::cast)
- .map(CharSequenceWrapper::wrap)
- .collect(Collectors.toSet());
-
- return Iterables.retainAll(wrapperSet, toRetain);
- }
-
- return false;
- }
-
- @Override
- @SuppressWarnings("CollectionUndefinedEquality")
- public boolean removeAll(Collection<?> objects) {
- if (objects != null) {
- return objects.stream().filter(this::remove).count() != 0;
- }
-
- return false;
- }
-
- @Override
- public void clear() {
- wrapperSet.clear();
- }
-
- @SuppressWarnings("CollectionUndefinedEquality")
- @Override
- public boolean equals(Object other) {
- if (this == other) {
- return true;
- } else if (!(other instanceof Set)) {
- return false;
- }
-
- Set<?> that = (Set<?>) other;
-
- if (size() != that.size()) {
- return false;
- }
-
- try {
- return containsAll(that);
- } catch (ClassCastException | NullPointerException unused) {
- return false;
- }
- }
-
- @Override
- public int hashCode() {
- return wrapperSet.stream().mapToInt(CharSequenceWrapper::hashCode).sum();
- }
-
- @Override
- public String toString() {
- return
Streams.stream(iterator()).collect(Collectors.joining("CharSequenceSet({", ",
", "})"));
+ // method is needed to not break API compatibility
+ return super.add(charSequence);
}
}
diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java
b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java
index 854264c1ae..59e8eb712d 100644
--- a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java
+++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java
@@ -18,12 +18,11 @@
*/
package org.apache.iceberg.util;
-import java.io.Serializable;
import org.apache.iceberg.types.Comparators;
import org.apache.iceberg.types.JavaHashes;
/** Wrapper class to adapt CharSequence for use in maps and sets. */
-public class CharSequenceWrapper implements CharSequence, Serializable {
+public class CharSequenceWrapper implements CharSequence,
WrapperSet.Wrapper<CharSequence> {
public static CharSequenceWrapper wrap(CharSequence seq) {
return new CharSequenceWrapper(seq);
}
@@ -39,6 +38,7 @@ public class CharSequenceWrapper implements CharSequence,
Serializable {
this.wrapped = wrapped;
}
+ @Override
public CharSequenceWrapper set(CharSequence newWrapped) {
this.wrapped = newWrapped;
this.hashCode = 0;
@@ -46,6 +46,7 @@ public class CharSequenceWrapper implements CharSequence,
Serializable {
return this;
}
+ @Override
public CharSequence get() {
return wrapped;
}
diff --git a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
index 324742c07a..093d2a0c6b 100644
--- a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
+++ b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
@@ -19,10 +19,12 @@
package org.apache.iceberg.util;
import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
+import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.junit.jupiter.api.Test;
@@ -42,15 +44,115 @@ public class TestCharSequenceSet {
@Test
public void nullString() {
- assertThat(CharSequenceSet.of(Arrays.asList((String)
null))).contains((String) null);
+ assertThatThrownBy(() -> CharSequenceSet.of(Arrays.asList((String) null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
assertThat(CharSequenceSet.empty()).doesNotContain((String) null);
}
+ @Test
+ public void emptySet() {
+ assertThat(CharSequenceSet.empty()).isEmpty();
+ assertThat(CharSequenceSet.empty()).doesNotContain("a", "b", "c");
+ }
+
+ @Test
+ public void insertionOrderIsMaintained() {
+ CharSequenceSet set = CharSequenceSet.empty();
+ set.addAll(ImmutableList.of("d", "a", "c"));
+ set.add("b");
+ set.add("d");
+
+ assertThat(set).hasSize(4).containsExactly("d", "a", "c", "b");
+ }
+
+ @Test
+ public void clear() {
+ CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("a", "b"));
+ set.clear();
+ assertThat(set).isEmpty();
+ }
+
+ @Test
+ public void addAll() {
+ CharSequenceSet empty = CharSequenceSet.empty();
+ assertThatThrownBy(() -> empty.add(null))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ assertThatThrownBy(() -> empty.addAll(null))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid collection: null");
+
+ assertThatThrownBy(() -> empty.addAll(Collections.singletonList(null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ assertThatThrownBy(() -> empty.addAll(Arrays.asList("a", null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ CharSequenceSet set = CharSequenceSet.empty();
+ set.addAll(ImmutableList.of("b", "a", "c", "a"));
+ assertThat(set).hasSize(3).containsExactly("b", "a", "c");
+ }
+
+ @Test
+ public void contains() {
+ CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("b", "a"));
+ assertThatThrownBy(() -> set.contains(null))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ assertThat(set).hasSize(2).containsExactly("b",
"a").doesNotContain("c").doesNotContain("d");
+
+ assertThatThrownBy(() -> CharSequenceSet.of(Arrays.asList("c", "b", null,
"a")))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+ }
+
+ @Test
+ public void containsAll() {
+ CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("b", "a"));
+ assertThatThrownBy(() -> set.containsAll(null))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid collection: null");
+
+ assertThatThrownBy(() -> set.containsAll(Collections.singletonList(null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ assertThatThrownBy(() -> set.containsAll(Arrays.asList("a", null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ assertThat(set.containsAll(ImmutableList.of("a", "b"))).isTrue();
+ assertThat(set.containsAll(ImmutableList.of("b", "a", "c"))).isFalse();
+ assertThat(set.containsAll(ImmutableList.of("b"))).isTrue();
+ }
+
@Test
public void testRetainAll() {
+ CharSequenceSet empty = CharSequenceSet.empty();
+ assertThatThrownBy(() -> empty.retainAll(null))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid collection: null");
+
+ assertThatThrownBy(() -> empty.retainAll(Collections.singletonList(null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ assertThatThrownBy(() -> empty.retainAll(Arrays.asList("123", null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ assertThatThrownBy(() -> empty.retainAll(ImmutableList.of("456", "789",
123)))
+ .isInstanceOf(ClassCastException.class)
+ .hasMessage("Cannot cast java.lang.Integer to java.lang.CharSequence");
+
CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456"));
- assertThat(set.retainAll(ImmutableList.of("456", "789", 123)))
+ assertThat(set.retainAll(ImmutableList.of("456", "789", "555")))
.overridingErrorMessage("Set should be changed")
.isTrue();
@@ -61,24 +163,74 @@ public class TestCharSequenceSet {
.overridingErrorMessage("Set should not be changed")
.isFalse();
- assertThat(set.retainAll(ImmutableList.of(123, 456)))
+ assertThat(set.retainAll(ImmutableList.of("555", "789")))
.overridingErrorMessage("Set should be changed")
.isTrue();
assertThat(set).isEmpty();
}
+ @Test
+ public void toArray() {
+ CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("b", "a"));
+ assertThat(set.toArray()).hasSize(2).containsExactly("b", "a");
+
+ CharSequence[] array = new CharSequence[1];
+ assertThat(set.toArray(array)).hasSize(2).containsExactly("b", "a");
+
+ array = new CharSequence[0];
+ assertThat(set.toArray(array)).hasSize(2).containsExactly("b", "a");
+
+ array = new CharSequence[5];
+ assertThat(set.toArray(array)).hasSize(5).containsExactly("b", "a", null,
null, null);
+
+ array = new CharSequence[2];
+ assertThat(set.toArray(array)).hasSize(2).containsExactly("b", "a");
+ }
+
+ @Test
+ public void remove() {
+ CharSequenceSet set = CharSequenceSet.of(ImmutableSet.of("a", "b", "c"));
+ assertThatThrownBy(() -> set.remove(null))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ set.remove("a");
+ assertThat(set).containsExactly("b", "c");
+ set.remove("b");
+ assertThat(set).containsExactly("c");
+ set.remove("c");
+ assertThat(set).isEmpty();
+ }
+
@Test
public void testRemoveAll() {
+ CharSequenceSet empty = CharSequenceSet.empty();
+ assertThatThrownBy(() -> empty.removeAll(null))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid collection: null");
+
+ assertThatThrownBy(() -> empty.removeAll(Collections.singletonList(null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ assertThatThrownBy(() -> empty.removeAll(Arrays.asList("123", null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Invalid object: null");
+
+ assertThatThrownBy(() -> empty.removeAll(ImmutableList.of("123", 456)))
+ .isInstanceOf(ClassCastException.class)
+ .hasMessage("Cannot cast java.lang.Integer to java.lang.CharSequence");
+
CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456"));
- assertThat(set.removeAll(ImmutableList.of("456", "789", 123)))
+ assertThat(set.removeAll(ImmutableList.of("456", "789")))
.overridingErrorMessage("Set should be changed")
.isTrue();
assertThat(set).hasSize(1).contains("123");
set = CharSequenceSet.of(ImmutableList.of("123", "456"));
- assertThat(set.removeAll(ImmutableList.of(123, 456)))
+ assertThat(set.removeAll(ImmutableList.of("333", "789")))
.overridingErrorMessage("Set should not be changed")
.isFalse();
@@ -119,4 +271,17 @@ public class TestCharSequenceSet {
.isEqualTo(set3.hashCode())
.isEqualTo(set4.hashCode());
}
+
+ @Test
+ public void kryoSerialization() throws Exception {
+ CharSequenceSet charSequences = CharSequenceSet.of(ImmutableList.of("c",
"b", "a"));
+
assertThat(TestHelpers.KryoHelpers.roundTripSerialize(charSequences)).isEqualTo(charSequences);
+ }
+
+ @Test
+ public void javaSerialization() throws Exception {
+ CharSequenceSet charSequences = CharSequenceSet.of(ImmutableList.of("c",
"b", "a"));
+ CharSequenceSet deserialize =
TestHelpers.deserialize(TestHelpers.serialize(charSequences));
+ assertThat(deserialize).isEqualTo(charSequences);
+ }
}