This is an automated email from the ASF dual-hosted git repository.

corgy pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git


The following commit(s) were added to refs/heads/dev by this push:
     new c43d57de31 [Feature][Connectors-v2] Clean up temporary files for 
paimon sink (#9819)
c43d57de31 is described below

commit c43d57de31e13f1006cc2fcf533870863994231d
Author: zhangdonghao <[email protected]>
AuthorDate: Fri Sep 5 12:58:17 2025 +0800

    [Feature][Connectors-v2] Clean up temporary files for paimon sink (#9819)
---
 .../seatunnel/paimon/sink/PaimonSinkWriter.java    | 18 ++++--
 .../seatunnel/e2e/connector/paimon/PaimonIT.java   | 28 +++++++--
 .../fake_to_paimon_with_change_log_tmp.conf        | 72 ++++++++++++++++++++++
 3 files changed, 108 insertions(+), 10 deletions(-)

diff --git 
a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkWriter.java
 
b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkWriter.java
index 64ced58c5d..4a20d5d04c 100644
--- 
a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkWriter.java
+++ 
b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkWriter.java
@@ -49,6 +49,7 @@ import 
org.apache.seatunnel.connectors.seatunnel.paimon.utils.RowConverter;
 import org.apache.paimon.CoreOptions;
 import org.apache.paimon.data.InternalRow;
 import org.apache.paimon.disk.IOManager;
+import org.apache.paimon.disk.IOManagerImpl;
 import org.apache.paimon.schema.TableSchema;
 import org.apache.paimon.table.BucketMode;
 import org.apache.paimon.table.FileStoreTable;
@@ -83,6 +84,8 @@ public class PaimonSinkWriter
 
     private FileStoreTable paimonTable;
 
+    private final IOManagerImpl ioManager;
+
     private TableWrite tableWrite;
 
     private final List<CommitMessage> committables = new ArrayList<>();
@@ -150,6 +153,9 @@ public class PaimonSinkWriter
         this.taskIndex = context.getIndexOfSubtask();
         this.paimonSinkConfig = paimonSinkConfig;
         this.sinkPaimonTableSchema = this.paimonTable.schema();
+        this.ioManager =
+                (IOManagerImpl)
+                        
IOManager.create(splitPaths(paimonSinkConfig.getChangelogTmpPath()));
         this.newTableWrite();
         BucketMode bucketMode = this.paimonTable.bucketMode();
         // 
https://paimon.apache.org/docs/master/primary-key-table/data-distribution/#dynamic-bucket
@@ -275,12 +281,7 @@ public class PaimonSinkWriter
     private void newTableWrite() {
         TableWrite oldTableWrite = this.tableWrite;
         tableWriteClose(oldTableWrite);
-        this.tableWrite =
-                this.paimonTable
-                        .newWrite(commitUser)
-                        .withIOManager(
-                                IOManager.create(
-                                        
splitPaths(paimonSinkConfig.getChangelogTmpPath())));
+        this.tableWrite = 
this.paimonTable.newWrite(commitUser).withIOManager(ioManager);
     }
 
     @Override
@@ -329,6 +330,11 @@ public class PaimonSinkWriter
             if (Objects.nonNull(paimonCatalog)) {
                 paimonCatalog.close();
             }
+            try {
+                ioManager.close();
+            } catch (Exception e) {
+                log.warn("Failed to close io manager in paimon sink writer.", 
e);
+            }
         }
     }
 
diff --git 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonIT.java
 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonIT.java
index b197d53273..d5ff43c10b 100644
--- 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonIT.java
+++ 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonIT.java
@@ -17,6 +17,7 @@
 
 package org.apache.seatunnel.e2e.connector.paimon;
 
+import org.apache.seatunnel.common.utils.FileUtils;
 import 
org.apache.seatunnel.connectors.seatunnel.paimon.config.PaimonBaseOptions;
 import org.apache.seatunnel.e2e.common.TestResource;
 import org.apache.seatunnel.e2e.common.TestSuiteBase;
@@ -43,6 +44,7 @@ import org.junit.jupiter.api.TestTemplate;
 import org.testcontainers.containers.Container;
 import org.testcontainers.utility.MountableFile;
 
+import java.io.File;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.ArrayList;
@@ -53,10 +55,10 @@ import java.util.List;
         disabledReason =
                 "Paimon does not support flink 1.13, Spark 2.4.6 has a jar 
package(zstd-jni-version.jar) version compatibility issue.")
 public class PaimonIT extends TestSuiteBase implements TestResource {
-    private String rootUser = "root";
-    private String rootPassword = "123456";
-    private String paimonUser = "paimon";
-    private String paimonUserPassword = "123456";
+    private final String rootUser = "root";
+    private final String rootPassword = "123456";
+    private final String paimonUser = "paimon";
+    private final String paimonUserPassword = "123456";
 
     private PrivilegedCatalog privilegedCatalog;
     private final String DATABASE_NAME = "default";
@@ -184,4 +186,22 @@ public class PaimonIT extends TestSuiteBase implements 
TestResource {
                 container.executeJob("/paimon_to_paimon_privilege1.conf");
         Assertions.assertEquals(1, execResult1.getExitCode());
     }
+
+    @TestTemplate
+    public void jobFinishedCleanTmpFiles(TestContainer container) throws 
Exception {
+        // fake to paimon
+        Container.ExecResult execResult =
+                
container.executeJob("/fake_to_paimon_with_change_log_tmp.conf");
+        Assertions.assertEquals(0, execResult.getExitCode());
+        // check job finished clean up tmp files
+        String hostName = System.getProperty("user.name");
+        boolean isWindows =
+                
System.getProperties().getProperty("os.name").toUpperCase().contains("WINDOWS");
+        String tmpDir =
+                isWindows
+                        ? 
String.format("C:/Users/%s/tmp/seatunnel_mnt/paimon_tmp", hostName)
+                        : "/tmp/seatunnel_mnt/paimon_tmp";
+        List<File> files = FileUtils.listFile(tmpDir);
+        Assertions.assertTrue(CollectionUtils.isEmpty(files));
+    }
 }
diff --git 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/fake_to_paimon_with_change_log_tmp.conf
 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/fake_to_paimon_with_change_log_tmp.conf
new file mode 100644
index 0000000000..b376875134
--- /dev/null
+++ 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/fake_to_paimon_with_change_log_tmp.conf
@@ -0,0 +1,72 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+
+  # You can set spark configuration here
+  spark.app.name = "SeaTunnel"
+  spark.executor.instances = 2
+  spark.executor.cores = 1
+  spark.executor.memory = "1g"
+  spark.master = local
+}
+
+source {
+  FakeSource {
+    auto.increment.enabled = true
+    auto.increment.start = 1
+    row.num = 100000
+    schema = {
+      fields {
+        pk_id = bigint
+        c_map = "map<string, string>"
+        c_array = "array<int>"
+        c_string = string
+        c_boolean = boolean
+        c_tinyint = tinyint
+        c_smallint = smallint
+        c_int = int
+        c_bigint = bigint
+        c_float = float
+        c_double = double
+        c_decimal = "decimal(30, 8)"
+        c_bytes = bytes
+        c_date = date
+        c_timestamp = timestamp
+        c_time = time
+      }
+      primaryKey {
+        name = "pk_id"
+        columnNames = [pk_id]
+      }
+    }
+    plugin_output = "fake"
+  }
+}
+
+sink {
+  Paimon {
+    warehouse = "/tmp/seatunnel_mnt/paimon"
+    database = "default"
+    table = "st_test"
+    paimon.table.write-props = {
+      changelog-tmp-path = "/tmp/seatunnel_mnt/paimon_tmp"
+    }
+  }
+}

Reply via email to