This is an automated email from the ASF dual-hosted git repository.
russellspitzer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 4c77264296 API, Core: Use short string in Variant when possible
(#13284)
4c77264296 is described below
commit 4c7726429603c15be68bc00aa751fa67f8975dad
Author: Manikandan R <[email protected]>
AuthorDate: Mon Jul 21 23:28:09 2025 +0530
API, Core: Use short string in Variant when possible (#13284)
---
.../org/apache/iceberg/variants/VariantUtil.java | 4 +
.../iceberg/variants/TestSerializedArray.java | 104 +++++++--------------
.../iceberg/variants/TestSerializedObject.java | 72 +++++++-------
.../iceberg/variants/TestSerializedPrimitives.java | 4 +-
.../apache/iceberg/variants/VariantTestUtil.java | 28 ++++++
.../apache/iceberg/variants/PrimitiveWrapper.java | 21 +++--
.../iceberg/variants/TestPrimitiveWrapper.java | 7 +-
.../iceberg/variants/TestShreddedObject.java | 27 ++----
.../apache/iceberg/variants/TestValueArray.java | 12 +--
9 files changed, 138 insertions(+), 141 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java
b/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java
index 812e00a698..d4335df8b5 100644
--- a/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java
+++ b/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java
@@ -181,6 +181,10 @@ class VariantUtil {
return (byte) ((isLarge ? 0b10000 : 0) | (offsetSize - 1) << 2 | 0b11);
}
+ static byte shortStringHeader(int length) {
+ return (byte) ((length << 2) | BASIC_TYPE_SHORT_STRING);
+ }
+
static BasicType basicType(int header) {
int basicType = header & BASIC_TYPE_MASK;
switch (basicType) {
diff --git
a/api/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java
b/api/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java
index 1007c1561a..6ab552482c 100644
--- a/api/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java
+++ b/api/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java
@@ -25,6 +25,8 @@ import java.nio.ByteBuffer;
import java.util.Random;
import org.apache.iceberg.util.RandomUtil;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
public class TestSerializedArray {
private static final VariantMetadata EMPTY_METADATA =
SerializedMetadata.EMPTY_V1_METADATA;
@@ -75,16 +77,11 @@ public class TestSerializedArray {
assertThat(array.type()).isEqualTo(PhysicalType.ARRAY);
assertThat(array.numElements()).isEqualTo(5);
- assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(0).asPrimitive().get()).isEqualTo("a");
- assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(1).asPrimitive().get()).isEqualTo("b");
- assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(2).asPrimitive().get()).isEqualTo("c");
- assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(3).asPrimitive().get()).isEqualTo("d");
- assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(4).asPrimitive().get()).isEqualTo("e");
+ VariantTestUtil.assertVariantString(array.get(0), "a");
+ VariantTestUtil.assertVariantString(array.get(1), "b");
+ VariantTestUtil.assertVariantString(array.get(2), "c");
+ VariantTestUtil.assertVariantString(array.get(3), "d");
+ VariantTestUtil.assertVariantString(array.get(4), "e");
assertThatThrownBy(() -> array.get(5))
.isInstanceOf(ArrayIndexOutOfBoundsException.class)
@@ -98,18 +95,12 @@ public class TestSerializedArray {
assertThat(array.type()).isEqualTo(PhysicalType.ARRAY);
assertThat(array.numElements()).isEqualTo(6);
- assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(0).asPrimitive().get()).isEqualTo("a");
- assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(1).asPrimitive().get()).isEqualTo("b");
- assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(2).asPrimitive().get()).isEqualTo("c");
- assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(3).asPrimitive().get()).isEqualTo("iceberg");
- assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(4).asPrimitive().get()).isEqualTo("d");
- assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(5).asPrimitive().get()).isEqualTo("e");
+ VariantTestUtil.assertVariantString(array.get(0), "a");
+ VariantTestUtil.assertVariantString(array.get(1), "b");
+ VariantTestUtil.assertVariantString(array.get(2), "c");
+ VariantTestUtil.assertVariantString(array.get(3), "iceberg");
+ VariantTestUtil.assertVariantString(array.get(4), "d");
+ VariantTestUtil.assertVariantString(array.get(5), "e");
assertThatThrownBy(() -> array.get(6))
.isInstanceOf(ArrayIndexOutOfBoundsException.class)
@@ -131,14 +122,11 @@ public class TestSerializedArray {
assertThat(array.get(0).asPrimitive().get()).isEqualTo(17396);
assertThat(array.get(1).type()).isEqualTo(PhysicalType.INT8);
assertThat(array.get(1).asPrimitive().get()).isEqualTo((byte) 34);
- assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(2).asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(array.get(2), "iceberg");
assertThat(array.get(3).type()).isEqualTo(PhysicalType.NULL);
assertThat(array.get(3).asPrimitive().get()).isEqualTo(null);
- assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(4).asPrimitive().get()).isEqualTo("e");
- assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(5).asPrimitive().get()).isEqualTo("b");
+ VariantTestUtil.assertVariantString(array.get(4), "e");
+ VariantTestUtil.assertVariantString(array.get(5), "b");
assertThat(array.get(6).type()).isEqualTo(PhysicalType.BOOLEAN_FALSE);
assertThat(array.get(6).asPrimitive().get()).isEqualTo(false);
assertThat(array.get(8).type()).isEqualTo(PhysicalType.BOOLEAN_TRUE);
@@ -153,22 +141,25 @@ public class TestSerializedArray {
assertThat(array.get(7).type()).isEqualTo(PhysicalType.ARRAY);
SerializedArray actualNested = (SerializedArray) array.get(7);
assertThat(actualNested.numElements()).isEqualTo(3);
- assertThat(actualNested.get(0).type()).isEqualTo(PhysicalType.STRING);
- assertThat(actualNested.get(0).asPrimitive().get()).isEqualTo("a");
- assertThat(actualNested.get(1).type()).isEqualTo(PhysicalType.STRING);
- assertThat(actualNested.get(1).asPrimitive().get()).isEqualTo("c");
- assertThat(actualNested.get(2).type()).isEqualTo(PhysicalType.STRING);
- assertThat(actualNested.get(2).asPrimitive().get()).isEqualTo("d");
+ VariantTestUtil.assertVariantString(actualNested.get(0), "a");
+ VariantTestUtil.assertVariantString(actualNested.get(1), "c");
+ VariantTestUtil.assertVariantString(actualNested.get(2), "d");
assertThatThrownBy(() -> actualNested.get(3))
.isInstanceOf(ArrayIndexOutOfBoundsException.class)
.hasMessage("Index 3 out of bounds for length 3");
}
- @Test
- public void testTwoByteOffsets() {
- // a string larger than 255 bytes to push the value offset size above 1
byte
- String randomString = RandomUtil.generateString(300, random);
+ @ParameterizedTest
+ @ValueSource(
+ ints = {
+ 300, // a big string larger than 255 bytes to push the value offset
size above 1 byte to
+ // test TwoByteOffsets
+ 70_000 // a really-big string larger than 65535 bytes to push the
value offset size above 1
+ // byte to test ThreeByteOffsets
+ })
+ public void testMultiByteOffsets(int multiByteOffset) {
+ String randomString = RandomUtil.generateString(multiByteOffset, random);
SerializedPrimitive bigString = VariantTestUtil.createString(randomString);
ByteBuffer buffer = VariantTestUtil.createArray(bigString, A, B, C);
@@ -176,39 +167,10 @@ public class TestSerializedArray {
assertThat(array.type()).isEqualTo(PhysicalType.ARRAY);
assertThat(array.numElements()).isEqualTo(4);
- assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(0).asPrimitive().get()).isEqualTo(randomString);
- assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(1).asPrimitive().get()).isEqualTo("a");
- assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(2).asPrimitive().get()).isEqualTo("b");
- assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(3).asPrimitive().get()).isEqualTo("c");
-
- assertThatThrownBy(() -> array.get(4))
- .isInstanceOf(ArrayIndexOutOfBoundsException.class)
- .hasMessage("Index 4 out of bounds for length 4");
- }
-
- @Test
- public void testThreeByteOffsets() {
- // a string larger than 65535 bytes to push the value offset size above 1
byte
- String randomString = RandomUtil.generateString(70_000, random);
- SerializedPrimitive reallyBigString =
VariantTestUtil.createString(randomString);
-
- ByteBuffer buffer = VariantTestUtil.createArray(reallyBigString, A, B, C);
- SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer,
buffer.get(0));
-
- assertThat(array.type()).isEqualTo(PhysicalType.ARRAY);
- assertThat(array.numElements()).isEqualTo(4);
- assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(0).asPrimitive().get()).isEqualTo(randomString);
- assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(1).asPrimitive().get()).isEqualTo("a");
- assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(2).asPrimitive().get()).isEqualTo("b");
- assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING);
- assertThat(array.get(3).asPrimitive().get()).isEqualTo("c");
+ VariantTestUtil.assertVariantString(array.get(0), randomString);
+ VariantTestUtil.assertVariantString(array.get(1), "a");
+ VariantTestUtil.assertVariantString(array.get(2), "b");
+ VariantTestUtil.assertVariantString(array.get(3), "c");
assertThatThrownBy(() -> array.get(4))
.isInstanceOf(ArrayIndexOutOfBoundsException.class)
diff --git
a/api/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java
b/api/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java
index 2bc0a50bee..9bfaf78f98 100644
--- a/api/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java
+++ b/api/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java
@@ -182,10 +182,16 @@ public class TestSerializedObject {
assertThat(actualInner.get("f").asPrimitive().get()).isEqualTo((byte) 3);
}
- @Test
- public void testTwoByteOffsets() {
- // a string larger than 255 bytes to push the value offset size above 1
byte
- String randomString = RandomUtil.generateString(300, random);
+ @ParameterizedTest
+ @ValueSource(
+ ints = {
+ 300, // a big string larger than 255 bytes to push the value offset
size above 1 byte to
+ // test TwoByteOffsets
+ 70_000 // a really-big string larger than 65535 bytes to push the
value offset size above 1
+ // byte to test ThreeByteOffsets
+ })
+ public void testMultiByteOffsets(int multiByteOffset) {
+ String randomString = RandomUtil.generateString(multiByteOffset, random);
SerializedPrimitive bigString = VariantTestUtil.createString(randomString);
// note that order doesn't matter. fields are sorted by name
@@ -205,36 +211,7 @@ public class TestSerializedObject {
assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2);
assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8);
assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3);
- assertThat(object.get("big").type()).isEqualTo(PhysicalType.STRING);
- assertThat(object.get("big").asPrimitive().get()).isEqualTo(randomString);
- }
-
- @Test
- public void testThreeByteOffsets() {
- // a string larger than 65535 bytes to push the value offset size above 1
byte
- String randomString = RandomUtil.generateString(70_000, random);
- SerializedPrimitive reallyBigString =
VariantTestUtil.createString(randomString);
-
- // note that order doesn't matter. fields are sorted by name
- Map<String, VariantValue> data =
- ImmutableMap.of("really-big", reallyBigString, "a", I1, "b", I2, "c",
I3);
- ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /*
sort names */);
- ByteBuffer value = VariantTestUtil.createObject(meta, data);
-
- VariantMetadata metadata = VariantMetadata.from(meta);
- SerializedObject object = SerializedObject.from(metadata, value,
value.get(0));
-
- assertThat(object.type()).isEqualTo(PhysicalType.OBJECT);
- assertThat(object.numFields()).isEqualTo(4);
-
- assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8);
- assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1);
- assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8);
- assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2);
- assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8);
- assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3);
- assertThat(object.get("really-big").type()).isEqualTo(PhysicalType.STRING);
-
assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString);
+ VariantTestUtil.assertVariantString(object.get("big"), randomString);
}
@ParameterizedTest
@@ -264,6 +241,33 @@ public class TestSerializedObject {
}
}
+ @Test
+ public void testShortStringsInVariantPrimitives() {
+ // note that order doesn't matter. fields are sorted by name
+ Map<String, VariantValue> data =
+ ImmutableMap.of(
+ "5-byte-header",
+ VariantTestUtil.createString("iceberg"),
+ "1-byte-header",
+ VariantTestUtil.createShortString("iceberg"));
+ ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /*
sort names */);
+ ByteBuffer value = VariantTestUtil.createObject(meta, data);
+
+ VariantMetadata metadata = VariantMetadata.from(meta);
+ SerializedObject object = SerializedObject.from(metadata, value,
value.get(0));
+
+ assertThat(object.type()).isEqualTo(PhysicalType.OBJECT);
+ assertThat(object.numFields()).isEqualTo(2);
+
+
assertThat(object.get("5-byte-header").type()).isEqualTo(PhysicalType.STRING);
+
assertThat(object.get("5-byte-header").asPrimitive().get()).isEqualTo("iceberg");
+
assertThat(object.get("5-byte-header").asPrimitive().sizeInBytes()).isEqualTo(5
+ 7);
+
+
assertThat(object.get("1-byte-header").type()).isEqualTo(PhysicalType.STRING);
+
assertThat(object.get("1-byte-header").asPrimitive().get()).isEqualTo("iceberg");
+
assertThat(object.get("1-byte-header").asPrimitive().sizeInBytes()).isEqualTo(1
+ 7);
+ }
+
@ParameterizedTest
@ValueSource(booleans = {true, false})
public void testTwoByteFieldIds(boolean sortFieldNames) {
diff --git
a/api/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java
b/api/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java
index 5f6fb2c12c..c9ad948754 100644
---
a/api/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java
+++
b/api/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java
@@ -442,6 +442,7 @@ public class TestSerializedPrimitives {
assertThat(value.type()).isEqualTo(PhysicalType.STRING);
assertThat(value.get()).isEqualTo("iceberg");
+ assertThat(value.sizeInBytes()).isEqualTo(12);
}
@Test
@@ -449,8 +450,7 @@ public class TestSerializedPrimitives {
VariantPrimitive<?> value =
SerializedShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b',
'e', 'r', 'g'});
- assertThat(value.type()).isEqualTo(PhysicalType.STRING);
- assertThat(value.get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(value, "iceberg");
}
@Test
diff --git a/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java
b/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java
index 4c77fefe50..46116c4c99 100644
--- a/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java
+++ b/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java
@@ -32,6 +32,9 @@ import
org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
public class VariantTestUtil {
+
+ private static final int MAX_SHORT_STRING_LENGTH = 63;
+
private VariantTestUtil() {}
public static void assertEqual(VariantMetadata expected, VariantMetadata
actual) {
@@ -78,6 +81,19 @@ public class VariantTestUtil {
}
}
+ public static void assertVariantString(VariantValue actual, String expected)
{
+ int expectedLength = expected.length();
+ assertThat(actual.type()).isEqualTo(PhysicalType.STRING);
+ assertThat(actual.asPrimitive().get()).isEqualTo(expected);
+ if (expectedLength <= MAX_SHORT_STRING_LENGTH) {
+ assertThat(actual.getClass()).isEqualTo(SerializedShortString.class);
+ assertThat(actual.asPrimitive().sizeInBytes()).isEqualTo(1 +
expectedLength);
+ } else {
+ assertThat(actual.getClass()).isEqualTo(SerializedPrimitive.class);
+ assertThat(actual.asPrimitive().sizeInBytes()).isEqualTo(5 +
expectedLength);
+ }
+ }
+
private static byte primitiveHeader(int primitiveType) {
return (byte) (primitiveType << 2);
}
@@ -107,6 +123,18 @@ public class VariantTestUtil {
return SerializedPrimitive.from(buffer, buffer.get(0));
}
+ /** Creates a short string primitive of max 63 bytes to use only 1 header */
+ static SerializedShortString createShortString(String string) {
+ Preconditions.checkArgument(
+ string.length() <= MAX_SHORT_STRING_LENGTH,
+ "Short String length is " + string.length() + ", should not be
greater than 63");
+ byte[] utf8 = string.getBytes(StandardCharsets.UTF_8);
+ ByteBuffer buffer = ByteBuffer.allocate(1 +
utf8.length).order(ByteOrder.LITTLE_ENDIAN);
+ buffer.put(0, VariantUtil.shortStringHeader(utf8.length));
+ writeBufferAbsolute(buffer, 1, ByteBuffer.wrap(utf8));
+ return SerializedShortString.from(buffer, buffer.get(0));
+ }
+
public static ByteBuffer variantBuffer(Map<String, VariantValue> data) {
ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /*
sort names */);
ByteBuffer value = VariantTestUtil.createObject(meta, data);
diff --git
a/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java
b/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java
index 118422d2fe..170d3d73ea 100644
--- a/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java
+++ b/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java
@@ -53,6 +53,7 @@ class PrimitiveWrapper<T> implements VariantPrimitive<T> {
private static final byte TIMESTAMPNTZ_NANOS_HEADER =
VariantUtil.primitiveHeader(Primitives.TYPE_TIMESTAMPNTZ_NANOS);
private static final byte UUID_HEADER =
VariantUtil.primitiveHeader(Primitives.TYPE_UUID);
+ private static final int MAX_SHORT_STRING_LENGTH = 63;
private final PhysicalType type;
private final T value;
@@ -114,7 +115,9 @@ class PrimitiveWrapper<T> implements VariantPrimitive<T> {
if (null == buffer) {
this.buffer = ByteBuffer.wrap(((String)
value).getBytes(StandardCharsets.UTF_8));
}
-
+ if (buffer.remaining() <= MAX_SHORT_STRING_LENGTH) {
+ return 1 + buffer.remaining(); // 1 header + value length
+ }
return 5 + buffer.remaining(); // 1 header + 4 length + value length
case UUID:
return 1 + 16; // 1 header + 16 length
@@ -208,15 +211,19 @@ class PrimitiveWrapper<T> implements VariantPrimitive<T> {
VariantUtil.writeBufferAbsolute(outBuffer, offset + 5, binary);
return 5 + binary.remaining();
case STRING:
- // TODO: use short string when possible
if (null == buffer) {
this.buffer = ByteBuffer.wrap(((String)
value).getBytes(StandardCharsets.UTF_8));
}
-
- outBuffer.put(offset, STRING_HEADER);
- outBuffer.putInt(offset + 1, buffer.remaining());
- VariantUtil.writeBufferAbsolute(outBuffer, offset + 5, buffer);
- return 5 + buffer.remaining();
+ if (buffer.remaining() <= MAX_SHORT_STRING_LENGTH) {
+ outBuffer.put(offset,
VariantUtil.shortStringHeader(buffer.remaining()));
+ VariantUtil.writeBufferAbsolute(outBuffer, offset + 1, buffer);
+ return 1 + buffer.remaining();
+ } else {
+ outBuffer.put(offset, STRING_HEADER);
+ outBuffer.putInt(offset + 1, buffer.remaining());
+ VariantUtil.writeBufferAbsolute(outBuffer, offset + 5, buffer);
+ return 5 + buffer.remaining();
+ }
case TIME:
outBuffer.put(offset, TIME_HEADER);
outBuffer.putLong(offset + 1, (Long) value);
diff --git
a/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java
b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java
index b8c3718ccc..108132087a 100644
--- a/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java
+++ b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java
@@ -23,6 +23,8 @@ import static org.assertj.core.api.Assertions.assertThat;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.util.Random;
+import org.apache.iceberg.util.RandomUtil;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.FieldSource;
@@ -57,7 +59,10 @@ public class TestPrimitiveWrapper {
Variants.of(new BigDecimal("9876543210.123456789")), // decimal16
Variants.of(new BigDecimal("-9876543210.123456789")), // decimal16
Variants.of(ByteBuffer.wrap(new byte[] {0x0a, 0x0b, 0x0c, 0x0d})),
- Variants.of("iceberg"),
+ Variants.of(
+
"icebergicebergicebergicebergicebergicebergicebergicebergiceberg"), // short
string of
+ // 63 (9*7) chars
+ Variants.of(RandomUtil.generateString(64, new Random(1))), // string
of 64 chars
};
@ParameterizedTest
diff --git
a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java
b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java
index 66d5c9911a..431f38cb19 100644
--- a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java
+++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java
@@ -73,8 +73,7 @@ public class TestShreddedObject {
assertThat(actual.numFields()).isEqualTo(3);
assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34);
- assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class);
- assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(actual.get("b"), "iceberg");
assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
}
@@ -92,8 +91,7 @@ public class TestShreddedObject {
assertThat(actual.numFields()).isEqualTo(3);
assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34);
- assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class);
- assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(actual.get("b"), "iceberg");
assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
}
@@ -111,8 +109,7 @@ public class TestShreddedObject {
assertThat(actual.numFields()).isEqualTo(3);
assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34);
- assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class);
- assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(actual.get("b"), "iceberg");
assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
}
@@ -130,8 +127,7 @@ public class TestShreddedObject {
assertThat(actual.numFields()).isEqualTo(3);
assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34);
- assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class);
- assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(actual.get("b"), "iceberg");
assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
}
@@ -237,12 +233,10 @@ public class TestShreddedObject {
assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT32);
assertThat(object.get("a").asPrimitive().get()).isEqualTo(34);
- assertThat(object.get("b").type()).isEqualTo(PhysicalType.STRING);
- assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(object.get("b"), "iceberg");
assertThat(object.get("c").type()).isEqualTo(PhysicalType.DECIMAL4);
assertThat(object.get("c").asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
- assertThat(object.get("big").type()).isEqualTo(PhysicalType.STRING);
- assertThat(object.get("big").asPrimitive().get()).isEqualTo(randomString);
+ VariantTestUtil.assertVariantString(object.get("big"), randomString);
}
@ParameterizedTest
@@ -268,8 +262,7 @@ public class TestShreddedObject {
for (Map.Entry<String, VariantPrimitive<String>> entry :
fields.entrySet()) {
VariantValue fieldValue = object.get(entry.getKey());
- assertThat(fieldValue.type()).isEqualTo(PhysicalType.STRING);
-
assertThat(fieldValue.asPrimitive().get()).isEqualTo(entry.getValue().get());
+ VariantTestUtil.assertVariantString(fieldValue, entry.getValue().get());
}
}
@@ -298,8 +291,7 @@ public class TestShreddedObject {
assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT32);
assertThat(object.get("aa").asPrimitive().get()).isEqualTo(34);
- assertThat(object.get("AA").type()).isEqualTo(PhysicalType.STRING);
- assertThat(object.get("AA").asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(object.get("AA"), "iceberg");
assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.DECIMAL4);
assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
}
@@ -329,8 +321,7 @@ public class TestShreddedObject {
assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT32);
assertThat(object.get("aa").asPrimitive().get()).isEqualTo(34);
- assertThat(object.get("AA").type()).isEqualTo(PhysicalType.STRING);
- assertThat(object.get("AA").asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(object.get("AA"), "iceberg");
assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.DECIMAL4);
assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
}
diff --git a/core/src/test/java/org/apache/iceberg/variants/TestValueArray.java
b/core/src/test/java/org/apache/iceberg/variants/TestValueArray.java
index f500f61065..d281d8a381 100644
--- a/core/src/test/java/org/apache/iceberg/variants/TestValueArray.java
+++ b/core/src/test/java/org/apache/iceberg/variants/TestValueArray.java
@@ -65,8 +65,7 @@ public class TestValueArray {
assertThat(actual.numElements()).isEqualTo(3);
assertThat(actual.get(0)).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get(0).asPrimitive().get()).isEqualTo(34);
- assertThat(actual.get(1)).isInstanceOf(VariantPrimitive.class);
- assertThat(actual.get(1).asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(actual.get(1), "iceberg");
assertThat(actual.get(2)).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get(2).asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
}
@@ -83,8 +82,7 @@ public class TestValueArray {
assertThat(actual.numElements()).isEqualTo(3);
assertThat(actual.get(0)).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get(0).asPrimitive().get()).isEqualTo(34);
- assertThat(actual.get(1)).isInstanceOf(VariantPrimitive.class);
- assertThat(actual.get(1).asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(actual.get(1), "iceberg");
assertThat(actual.get(2)).isInstanceOf(VariantPrimitive.class);
assertThat(actual.get(2).asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
}
@@ -109,12 +107,10 @@ public class TestValueArray {
assertThat(actualArray.get(0).type()).isEqualTo(PhysicalType.INT32);
assertThat(actualArray.get(0).asPrimitive().get()).isEqualTo(34);
- assertThat(actualArray.get(1).type()).isEqualTo(PhysicalType.STRING);
- assertThat(actualArray.get(1).asPrimitive().get()).isEqualTo("iceberg");
+ VariantTestUtil.assertVariantString(actualArray.get(1), "iceberg");
assertThat(actualArray.get(2).type()).isEqualTo(PhysicalType.DECIMAL4);
assertThat(actualArray.get(2).asPrimitive().get()).isEqualTo(new
BigDecimal("12.21"));
- assertThat(actualArray.get(3).type()).isEqualTo(PhysicalType.STRING);
- assertThat(actualArray.get(3).asPrimitive().get()).isEqualTo(randomString);
+ VariantTestUtil.assertVariantString(actualArray.get(3), randomString);
}
@Test