aihuaxu commented on code in PR #13219:
URL: https://github.com/apache/iceberg/pull/13219#discussion_r2232605954


##########
spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java:
##########
@@ -296,6 +320,134 @@ private static void assertEqualsUnsafe(Types.MapType map, 
Map<?, ?> expected, Ma
     }
   }
 
+  static void assertEquals(Variant expected, VariantVal actual) {
+    VariantMetadata actualMetadata =
+        
VariantMetadata.from(ByteBuffer.wrap(actual.getMetadata()).order(ByteOrder.LITTLE_ENDIAN));
+    VariantTestUtil.assertEqual(expected.metadata(), actualMetadata);
+
+    org.apache.spark.types.variant.Variant sparkVariant =
+        new org.apache.spark.types.variant.Variant(actual.getValue(), 
actual.getMetadata());
+    assertEquals(expected.value(), sparkVariant);
+  }
+
+  static void assertEquals(VariantValue expected, 
org.apache.spark.types.variant.Variant actual) {
+    assertThat(actual).isNotNull();
+    assertThat(expected).isNotNull();
+
+    if (expected.type() == PhysicalType.OBJECT) {
+      assertThat(actual.getType()).isEqualTo(VariantUtil.Type.OBJECT);
+      VariantObject expectedObject = expected.asObject();
+      assertThat(actual.objectSize())
+          .as("Variant object num fields should match")
+          .isEqualTo(expectedObject.numFields());
+
+      for (String fieldName : expectedObject.fieldNames()) {
+        assertEquals(expectedObject.get(fieldName), 
actual.getFieldByKey(fieldName));
+      }
+
+    } else if (expected.type() == PhysicalType.ARRAY) {
+      assertThat(actual.getType()).isEqualTo(VariantUtil.Type.ARRAY);
+      VariantArray expectedArray = expected.asArray();
+      assertThat(actual.arraySize())
+          .as("Variant array num element should match")
+          .isEqualTo(expectedArray.numElements());
+
+      for (int i = 0; i < expectedArray.numElements(); i += 1) {
+        assertEquals(expectedArray.get(i), actual.getElementAtIndex(i));
+      }
+
+    } else {
+      // Primitive type and value should match
+      VariantUtil.Type expectedType = null;
+      Object actualValue = null;
+      switch (expected.type()) {
+        case NULL:
+          expectedType = VariantUtil.Type.NULL;
+          actualValue = null;
+          break;
+        case BOOLEAN_TRUE:
+        case BOOLEAN_FALSE:
+          expectedType = VariantUtil.Type.BOOLEAN;
+          actualValue = actual.getBoolean();
+          break;
+        case INT8:
+          expectedType = VariantUtil.Type.LONG;
+          actualValue = (byte) actual.getLong();

Review Comment:
   I'm not casting to byte anymore.  I will not cast down but will convert 
expected value - byte/short/int to long so we don't lose value.



##########
spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkVariants.java:
##########
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.data;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import org.apache.iceberg.spark.SparkSchemaUtil;
+import org.apache.iceberg.spark.TestBase;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.variants.ShreddedObject;
+import org.apache.iceberg.variants.ValueArray;
+import org.apache.iceberg.variants.Variant;
+import org.apache.iceberg.variants.VariantMetadata;
+import org.apache.iceberg.variants.VariantPrimitive;
+import org.apache.iceberg.variants.VariantTestUtil;
+import org.apache.iceberg.variants.VariantValue;
+import org.apache.iceberg.variants.Variants;
+import org.apache.spark.SparkRuntimeException;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.VariantType$;
+import org.apache.spark.unsafe.types.VariantVal;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.FieldSource;
+
+public class TestSparkVariants extends TestBase {
+  private static final VariantPrimitive<?>[] PRIMITIVES =
+      new VariantPrimitive[] {
+        Variants.ofNull(),
+        Variants.of(true),
+        Variants.of(false),
+        Variants.of((byte) 34),
+        Variants.of((byte) -34),
+        Variants.of((short) 1234),
+        Variants.of((short) -1234),
+        Variants.of(12345),
+        Variants.of(-12345),
+        Variants.of(9876543210L),
+        Variants.of(-9876543210L),
+        Variants.of(10.11F),
+        Variants.of(-10.11F),
+        Variants.of(14.3D),
+        Variants.of(-14.3D),
+        Variants.ofIsoDate("2024-11-07"),
+        Variants.ofIsoDate("1957-11-07"),
+        Variants.ofIsoTimestamptz("2024-11-07T12:33:54.123456+00:00"),
+        Variants.ofIsoTimestamptz("1957-11-07T12:33:54.123456+00:00"),
+        Variants.ofIsoTimestampntz("2024-11-07T12:33:54.123456"),
+        Variants.ofIsoTimestampntz("1957-11-07T12:33:54.123456"),
+        Variants.of(new BigDecimal("12345.6789")), // decimal4
+        Variants.of(new BigDecimal("-12345.6789")), // decimal4
+        Variants.of(new BigDecimal("123456789.987654321")), // decimal8
+        Variants.of(new BigDecimal("-123456789.987654321")), // decimal8
+        Variants.of(new BigDecimal("9876543210.123456789")), // decimal16
+        Variants.of(new BigDecimal("-9876543210.123456789")), // decimal16
+        Variants.of(ByteBuffer.wrap(new byte[] {0x0a, 0x0b, 0x0c, 0x0d})),
+        Variants.of("iceberg"),
+        Variants.ofUUID("f24f9b64-81fa-49d1-b74e-8c09a6e31c56"),
+      };
+
+  private static final VariantPrimitive<?>[] UNSUPPORTED_PRIMITIVES =
+      new VariantPrimitive[] {
+        Variants.ofIsoTime("12:33:54.123456"),
+        Variants.ofIsoTimestamptzNanos("2024-11-07T12:33:54.123456789+00:00"),
+        Variants.ofIsoTimestampntzNanos("2024-11-07T12:33:54.123456789"),
+      };
+
+  @Test
+  public void testIcebergVariantTypeToSparkVariantType() {
+    // Test that Iceberg's VariantType converts to Spark's VariantType
+    Types.VariantType icebergVariantType = Types.VariantType.get();
+    DataType sparkVariantType = SparkSchemaUtil.convert(icebergVariantType);
+
+    assertThat(sparkVariantType).isEqualTo(VariantType$.MODULE$);

Review Comment:
   I think it's even further to make sure it's returning the singleton of 
`VariantType`. Would this be better?



##########
spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java:
##########
@@ -296,6 +320,134 @@ private static void assertEqualsUnsafe(Types.MapType map, 
Map<?, ?> expected, Ma
     }
   }
 
+  static void assertEquals(Variant expected, VariantVal actual) {
+    VariantMetadata actualMetadata =
+        
VariantMetadata.from(ByteBuffer.wrap(actual.getMetadata()).order(ByteOrder.LITTLE_ENDIAN));
+    VariantTestUtil.assertEqual(expected.metadata(), actualMetadata);
+
+    org.apache.spark.types.variant.Variant sparkVariant =
+        new org.apache.spark.types.variant.Variant(actual.getValue(), 
actual.getMetadata());
+    assertEquals(expected.value(), sparkVariant);
+  }
+
+  static void assertEquals(VariantValue expected, 
org.apache.spark.types.variant.Variant actual) {

Review Comment:
   That makes sense. I think I will limit this assertion to TestSparkVariants 
to check out incompatibility. 



##########
spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java:
##########
@@ -296,6 +320,134 @@ private static void assertEqualsUnsafe(Types.MapType map, 
Map<?, ?> expected, Ma
     }
   }
 
+  static void assertEquals(Variant expected, VariantVal actual) {
+    VariantMetadata actualMetadata =
+        
VariantMetadata.from(ByteBuffer.wrap(actual.getMetadata()).order(ByteOrder.LITTLE_ENDIAN));
+    VariantTestUtil.assertEqual(expected.metadata(), actualMetadata);
+
+    org.apache.spark.types.variant.Variant sparkVariant =
+        new org.apache.spark.types.variant.Variant(actual.getValue(), 
actual.getMetadata());
+    assertEquals(expected.value(), sparkVariant);
+  }
+
+  static void assertEquals(VariantValue expected, 
org.apache.spark.types.variant.Variant actual) {
+    assertThat(actual).isNotNull();
+    assertThat(expected).isNotNull();
+
+    if (expected.type() == PhysicalType.OBJECT) {

Review Comment:
   I changed this part to validate for each type. With that, I will not cast 
down but will convert byte/short/int to long so we don't lose value.



##########
spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/data/RandomData.java:
##########
@@ -332,6 +340,20 @@ public Object map(Types.MapType map, Supplier<Object> 
keyResult, Supplier<Object
       return result;
     }
 
+    public VariantVal variant(Types.VariantType type) {

Review Comment:
   It's in SparkRandomDataGenerator. As I can see from other method, it's 
returning Spark object as well. 
   
   >     public GenericArrayData list(Types.ListType list, Supplier<Object> 
elementResult);
    



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to