ebyhr commented on code in PR #12893: URL: https://github.com/apache/iceberg/pull/12893#discussion_r2259913801
########## spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestSparkExecutorCache.java: ########## @@ -181,6 +181,30 @@ public void testMaxTotalSizeConfig() { }); } + @TestTemplate + public void testDeleteFilesCacheDisabledConfig() throws Exception { + createAndInitTable(TableProperties.DELETE_MODE, COPY_ON_WRITE); + Table table = validationCatalog.loadTable(targetTableIdent); + + withSQLConf( + ImmutableMap.of( + SparkSQLProperties.EXECUTOR_CACHE_ENABLED, "true", + SparkSQLProperties.EXECUTOR_CACHE_DELETE_FILES_ENABLED, "false"), + () -> { + SparkReadConf readConf = new SparkReadConf(spark, table, Collections.emptyMap()); + assertThat(readConf.cacheDeleteFilesOnExecutors()).isFalse(); + }); + + withSQLConf( + ImmutableMap.of( + SparkSQLProperties.EXECUTOR_CACHE_ENABLED, "true", + SparkSQLProperties.EXECUTOR_CACHE_DELETE_FILES_ENABLED, "true"), + () -> { + SparkReadConf readConf = new SparkReadConf(spark, table, Collections.emptyMap()); + assertThat(readConf.cacheDeleteFilesOnExecutors()).isTrue(); + }); Review Comment: Do we have a test coverage that `cacheDeleteFilesOnExecutors()` is false when EXECUTOR_CACHE_ENABLED is false but EXECUTOR_CACHE_DELETE_FILES_ENABLED is true? ########## spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestSparkExecutorCache.java: ########## @@ -349,6 +373,141 @@ private void checkMerge(RowLevelOperationMode mode) throws Exception { sql("SELECT * FROM %s ORDER BY id ASC", targetTableName)); } + @TestTemplate + public void testCopyOnWriteDeleteWithDeleteFileCacheCacheDisabled() throws Exception { Review Comment: "Cache" is repeated: ```suggestion public void testCopyOnWriteDeleteWithDeleteFileCacheDisabled() throws Exception { ``` ########## spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestSparkExecutorCache.java: ########## @@ -349,6 +373,141 @@ private void checkMerge(RowLevelOperationMode mode) throws Exception { sql("SELECT * FROM %s ORDER BY id ASC", targetTableName)); } + @TestTemplate + public void testCopyOnWriteDeleteWithDeleteFileCacheCacheDisabled() throws Exception { + checkDeleteWithDeleteFilesCacheDisabled(COPY_ON_WRITE); + } + + @TestTemplate + public void testMergeOnReadDeleteWithDeleteFileCacheDisabled() throws Exception { + checkDeleteWithDeleteFilesCacheDisabled(MERGE_ON_READ); + } + + private void checkDeleteWithDeleteFilesCacheDisabled(RowLevelOperationMode mode) + throws Exception { + withSQLConf( + ImmutableMap.of( + SparkSQLProperties.EXECUTOR_CACHE_ENABLED, "true", + SparkSQLProperties.EXECUTOR_CACHE_DELETE_FILES_ENABLED, "false"), + () -> { + try { + List<DeleteFile> deleteFiles = createAndInitTable(TableProperties.DELETE_MODE, mode); + + sql("DELETE FROM %s WHERE id = 1 OR id = 4", targetTableName); + + // When cache is disabled, delete files should be opened multiple times + // The cached CoW test has a maximum of 3 scans, so we expect more than that when + // disabled. + // The cached MoR test has a maximum of 1 scan, so we expect more than that when + // disabled. + int expectedMinStreamCount = mode == COPY_ON_WRITE ? 4 : 2; + assertThat(deleteFiles) + .allMatch(deleteFile -> streamCount(deleteFile) >= expectedMinStreamCount); + + assertEquals( + "Should have expected rows", + ImmutableList.of(), + sql("SELECT * FROM %s ORDER BY id ASC", targetTableName)); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + + @TestTemplate + public void testCopyOnWriteUpdateWithDeleteFilesCacheDisabled() throws Exception { + checkUpdateWithDeleteFilesCacheDisabled(COPY_ON_WRITE); + } + + @TestTemplate + public void testMergeOnReadUpdateWithDeleteFilesCacheDisabled() throws Exception { + checkUpdateWithDeleteFilesCacheDisabled(MERGE_ON_READ); + } + + private void checkUpdateWithDeleteFilesCacheDisabled(RowLevelOperationMode mode) + throws Exception { + withSQLConf( + ImmutableMap.of( + SparkSQLProperties.EXECUTOR_CACHE_ENABLED, "true", + SparkSQLProperties.EXECUTOR_CACHE_DELETE_FILES_ENABLED, "false"), + () -> { + try { + List<DeleteFile> deleteFiles = createAndInitTable(TableProperties.UPDATE_MODE, mode); + + Dataset<Integer> updateDS = spark.createDataset(ImmutableList.of(1, 4), Encoders.INT()); + updateDS.createOrReplaceTempView(UPDATES_VIEW_NAME); + + sql( + "UPDATE %s SET id = -1 WHERE id IN (SELECT * FROM %s)", + targetTableName, UPDATES_VIEW_NAME); + + // When cache is disabled, delete files should be opened multiple times + // Both CoW and MoR should open delete files at least 2 times without caching + int expectedMinStreamCount = 2; + assertThat(deleteFiles) + .allMatch(deleteFile -> streamCount(deleteFile) >= expectedMinStreamCount); + + assertEquals( + "Should have expected rows", + ImmutableList.of(row(-1, "hr"), row(-1, "hr")), + sql("SELECT * FROM %s ORDER BY id ASC", targetTableName)); Review Comment: nit: `ORDER BY` looks redundant when the expected result is empty. ########## spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestSparkExecutorCache.java: ########## @@ -349,6 +373,141 @@ private void checkMerge(RowLevelOperationMode mode) throws Exception { sql("SELECT * FROM %s ORDER BY id ASC", targetTableName)); } + @TestTemplate + public void testCopyOnWriteDeleteWithDeleteFileCacheCacheDisabled() throws Exception { + checkDeleteWithDeleteFilesCacheDisabled(COPY_ON_WRITE); + } + + @TestTemplate + public void testMergeOnReadDeleteWithDeleteFileCacheDisabled() throws Exception { + checkDeleteWithDeleteFilesCacheDisabled(MERGE_ON_READ); + } + + private void checkDeleteWithDeleteFilesCacheDisabled(RowLevelOperationMode mode) + throws Exception { Review Comment: nit: throws Exception is redundant. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org