nssalian commented on code in PR #16087:
URL: https://github.com/apache/iceberg/pull/16087#discussion_r3154692444
##########
spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/sql/TestSparkVariantRead.java:
##########
@@ -302,6 +302,56 @@ public void testNestedMapVariant(boolean vectorized) {
sql("DROP TABLE IF EXISTS %s", mapTable);
}
+ @ParameterizedTest
+ @ValueSource(booleans = {false, true})
+ public void testMergeIntoWithVariant(boolean vectorized) {
+ // Variant columns are not vectorized yet, but MERGE INTO should not crash
regardless of the
+ // vectorization setting. The reader falls back to non-vectorized for
variant columns.
+ String mergeTable = CATALOG + ".default.var_merge";
+ sql("DROP TABLE IF EXISTS %s", mergeTable);
+ sql(
+ "CREATE TABLE %s (id BIGINT, data VARIANT) USING iceberg "
+ + "TBLPROPERTIES ('format-version'='3')",
+ mergeTable);
+ setVectorization(mergeTable, vectorized);
+
+ sql(
+ "INSERT INTO %s VALUES "
+ + "(1, parse_json('{\"name\":\"alice\",\"age\":30}')), "
+ + "(2, parse_json('{\"name\":\"bob\",\"age\":25}'))",
+ mergeTable);
+
+ sql(
+ "MERGE INTO %s AS target "
+ + "USING (SELECT 1 AS id,
parse_json('{\"name\":\"alice\",\"age\":31}') AS data) AS source "
+ + "ON target.id = source.id "
+ + " AND variant_get(target.data, '$.name', 'string') =
variant_get(source.data, '$.name', 'string') "
+ + "WHEN MATCHED THEN UPDATE SET target.data = source.data "
+ + "WHEN NOT MATCHED THEN INSERT *",
+ mergeTable);
+
+ List<Row> rows = spark.table(mergeTable).select("id",
"data").orderBy("id").collectAsList();
+
+ assertThat(rows).hasSize(2);
+ assertThat(rows.get(0).getLong(0)).isEqualTo(1L);
+ Variant v1 =
+ new Variant(
+ ((VariantVal) rows.get(0).get(1)).getValue(),
+ ((VariantVal) rows.get(0).get(1)).getMetadata());
+ assertThat(v1.getFieldByKey("name").getString()).isEqualTo("alice");
Review Comment:
Will take another look :)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]