This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new b82dac4858 Spark 4.0: Fix source location in stats file copy plan in
RewriteTablePathSparkAction (#13881)
b82dac4858 is described below
commit b82dac4858fb2d15929a797660252c50f8e00ed8
Author: Anurag Mantripragada <[email protected]>
AuthorDate: Fri Aug 22 11:33:02 2025 -0700
Spark 4.0: Fix source location in stats file copy plan in
RewriteTablePathSparkAction (#13881)
* Spark 4.0: Fix source location in stats file copy plan in
RewriteTablePathSparkAction
* Address comments
* Address comments on test
* Spotless Apply
---
.../spark/actions/RewriteTablePathSparkAction.java | 5 +--
.../spark/actions/TestRewriteTablePathsAction.java | 50 ++++++++++++++++++++++
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteTablePathSparkAction.java
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteTablePathSparkAction.java
index 771b7a45b1..14ad724c1c 100644
---
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteTablePathSparkAction.java
+++
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteTablePathSparkAction.java
@@ -404,10 +404,7 @@ public class RewriteTablePathSparkAction extends
BaseSparkAction<RewriteTablePat
Preconditions.checkArgument(
before.fileSizeInBytes() == after.fileSizeInBytes(),
"Before and after path rewrite, statistic file size should be same");
- result.add(
- Pair.of(
- RewriteTablePathUtil.stagingPath(before.path(), sourcePrefix,
stagingDir),
- after.path()));
+ result.add(Pair.of(before.path(), after.path()));
}
return result;
}
diff --git
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java
index 5735d2b335..770f6c5c7a 100644
---
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java
+++
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java
@@ -963,6 +963,56 @@ public class TestRewriteTablePathsAction extends TestBase {
iterations * 2 + 1, iterations, iterations, iterations, iterations * 6
+ 1, result);
}
+ @Test
+ public void testStatisticsFileSourcePath() throws IOException {
+ String sourceTableLocation = newTableLocation();
+ Map<String, String> properties = Maps.newHashMap();
+ properties.put("format-version", "2");
+ String tableName = "v2tblwithstats";
+ Table sourceTable =
+ createMetastoreTable(sourceTableLocation, properties, "default",
tableName, 1);
+
+ // Compute table statistics to generate a .stats file
+ actions().computeTableStats(sourceTable).execute();
+
+ assertThat(sourceTable.statisticsFiles())
+ .as("Should include 1 statistics file after compute stats")
+ .hasSize(1);
+
+ String targetTableLocation = targetTableLocation();
+ RewriteTablePath.Result result =
+ actions()
+ .rewriteTablePath(sourceTable)
+ .rewriteLocationPrefix(sourceTableLocation, targetTableLocation)
+ .execute();
+
+ checkFileNum(3, 1, 1, 1, 7, result);
+
+ // Read the file list to verify statistics file paths
+ List<Tuple2<String, String>> filesToMove =
readPathPairList(result.fileListLocation());
+
+ // Find the statistics file entry in the file list using stream
+ Tuple2<String, String> statsFilePathPair =
+ filesToMove.stream()
+ .filter(pathPair -> pathPair._1().endsWith(".stats"))
+ .findFirst()
+ .orElse(null);
+
+ assertThat(statsFilePathPair).as("Should find statistics file in file
list").isNotNull();
+
+ // Verify the source path points to the actual source location, not staging
+ assertThat(statsFilePathPair._1())
+ .as("Statistics file source should point to source table location and
NOT staging")
+ .startsWith(sourceTableLocation)
+ .contains("/metadata/")
+ .doesNotContain("staging");
+
+ // Verify the target path is correctly rewritten
+ assertThat(statsFilePathPair._2())
+ .as("Statistics file target should point to target table location")
+ .startsWith(targetTableLocation);
+ }
+
@Test
public void testMetadataCompressionWithMetastoreTable() throws Exception {
Map<String, String> properties = Maps.newHashMap();