nastra commented on code in PR #11419:
URL: https://github.com/apache/iceberg/pull/11419#discussion_r1839833296
##########
spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWriterV2.java:
##########
@@ -226,4 +230,132 @@ public void testWriteWithCaseSensitiveOption() throws
NoSuchTableException, Pars
fields = Spark3Util.loadIcebergTable(sparkSession,
tableName).schema().asStruct().fields();
Assert.assertEquals(4, fields.size());
}
+
+ @Test
+ public void testMergeSchemaIgnoreCastingLongToInt() throws Exception {
+ sql(
+ "ALTER TABLE %s SET TBLPROPERTIES ('%s'='true')",
+ tableName, TableProperties.SPARK_WRITE_ACCEPT_ANY_SCHEMA);
+
+ Dataset<Row> bigintDF =
+ jsonToDF(
+ "id bigint, data string",
+ "{ \"id\": 1, \"data\": \"a\" }",
+ "{ \"id\": 2, \"data\": \"b\" }");
+
+ bigintDF.writeTo(tableName).append();
+
+ assertEquals(
+ "Should have initial rows with long column",
+ ImmutableList.of(row(1L, "a"), row(2L, "b")),
+ sql("select * from %s order by id", tableName));
+
+ Dataset<Row> intDF =
+ jsonToDF(
+ "id int, data string",
+ "{ \"id\": 3, \"data\": \"c\" }",
+ "{ \"id\": 4, \"data\": \"d\" }");
+
+ assertThatCode(() -> intDF.writeTo(tableName).option("merge-schema",
"true").append())
+ .doesNotThrowAnyException();
+
+ assertEquals(
+ "Should include new rows with unchanged long column type",
+ ImmutableList.of(row(1L, "a"), row(2L, "b"), row(3L, "c"), row(4L,
"d")),
+ sql("select * from %s order by id", tableName));
+
+ // verify the column type did not change
+ Types.NestedField idField =
+ Spark3Util.loadIcebergTable(spark, tableName).schema().findField("id");
+ assertThat(idField.type().typeId().equals(Type.TypeID.LONG));
+ }
+
+ @Test
+ public void testMergeSchemaIgnoreCastingDoubleToFloat() throws Exception {
+ removeTables();
+ sql("CREATE TABLE %s (id double, data string) USING iceberg", tableName);
+ sql(
+ "ALTER TABLE %s SET TBLPROPERTIES ('%s'='true')",
+ tableName, TableProperties.SPARK_WRITE_ACCEPT_ANY_SCHEMA);
+
+ Dataset<Row> doubleDF =
+ jsonToDF(
+ "id double, data string",
+ "{ \"id\": 1.0, \"data\": \"a\" }",
+ "{ \"id\": 2.0, \"data\": \"b\" }");
+
+ doubleDF.writeTo(tableName).append();
+
+ assertEquals(
+ "Should have initial rows with double column",
+ ImmutableList.of(row(1.0, "a"), row(2.0, "b")),
+ sql("select * from %s order by id", tableName));
+
+ Dataset<Row> floatDF =
+ jsonToDF(
+ "id float, data string",
+ "{ \"id\": 3.0, \"data\": \"c\" }",
+ "{ \"id\": 4.0, \"data\": \"d\" }");
+
+ assertThatCode(() -> floatDF.writeTo(tableName).option("merge-schema",
"true").append())
+ .doesNotThrowAnyException();
+
+ assertEquals(
+ "Should include new rows with unchanged double column type",
+ ImmutableList.of(row(1.0, "a"), row(2.0, "b"), row(3.0, "c"), row(4.0,
"d")),
+ sql("select * from %s order by id", tableName));
+
+ // verify the column type did not change
+ Types.NestedField idField =
+ Spark3Util.loadIcebergTable(spark, tableName).schema().findField("id");
+ assertThat(idField.type().typeId().equals(Type.TypeID.DOUBLE));
+ }
+
+ @Test
+ public void
testMergeSchemaIgnoreCastingDecimalToDecimalWithNarrowerPrecision() throws
Exception {
+ removeTables();
+ sql("CREATE TABLE %s (id decimal(6,2), data string) USING iceberg",
tableName);
+ sql(
+ "ALTER TABLE %s SET TBLPROPERTIES ('%s'='true')",
+ tableName, TableProperties.SPARK_WRITE_ACCEPT_ANY_SCHEMA);
+
+ Dataset<Row> decimalPrecision6DF =
+ jsonToDF(
+ "id decimal(6,2), data string",
+ "{ \"id\": 1.0, \"data\": \"a\" }",
+ "{ \"id\": 2.0, \"data\": \"b\" }");
+
+ decimalPrecision6DF.writeTo(tableName).append();
+
+ assertEquals(
+ "Should have initial rows with decimal column with precision 6",
+ ImmutableList.of(row(new BigDecimal("1.00"), "a"), row(new
BigDecimal("2.00"), "b")),
+ sql("select * from %s order by id", tableName));
+
+ Dataset<Row> decimalPrecision4DF =
+ jsonToDF(
+ "id decimal(4,2), data string",
+ "{ \"id\": 3.0, \"data\": \"c\" }",
+ "{ \"id\": 4.0, \"data\": \"d\" }");
+
+ assertThatCode(
+ () ->
decimalPrecision4DF.writeTo(tableName).option("merge-schema", "true").append())
+ .doesNotThrowAnyException();
+
+ assertEquals(
+ "Should include new rows with unchanged decimal precision",
+ ImmutableList.of(
+ row(new BigDecimal("1.00"), "a"),
+ row(new BigDecimal("2.00"), "b"),
+ row(new BigDecimal("3.00"), "c"),
+ row(new BigDecimal("4.00"), "d")),
+ sql("select * from %s order by id", tableName));
+
+ // verify the decimal column precision did not change
+ Type idFieldType =
+ Spark3Util.loadIcebergTable(spark,
tableName).schema().findField("id").type();
+ assertThat(idFieldType.typeId().equals(Type.TypeID.DECIMAL));
+ Types.DecimalType decimalType = (Types.DecimalType) idFieldType;
+ assertThat(decimalType.precision() == 6);
Review Comment:
```suggestion
assertThat(decimalType.precision()).isEqualTo(6);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]