This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 93e8cb407c [core] Fix listFilesIterative bug and avoid using 
Files.walk in SimpleTableTestBase (#5389)
93e8cb407c is described below

commit 93e8cb407cc53b1e634da2362c5847c86661dd9a
Author: Jingsong Lee <[email protected]>
AuthorDate: Wed Apr 2 14:56:38 2025 +0800

    [core] Fix listFilesIterative bug and avoid using Files.walk in 
SimpleTableTestBase (#5389)
---
 .../src/main/java/org/apache/paimon/fs/FileIO.java | 26 ++++-----
 .../org/apache/paimon/fs/local/LocalFileIO.java    |  5 ++
 .../apache/paimon/table/SimpleTableTestBase.java   | 64 ++++++++--------------
 3 files changed, 40 insertions(+), 55 deletions(-)

diff --git a/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java 
b/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
index 0ed13de06c..201f3418e2 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
@@ -154,22 +154,18 @@ public interface FileIO extends Serializable, Closeable {
             }
 
             private void maybeUnpackDirectory() throws IOException {
-                if (!files.isEmpty()) {
-                    return;
-                }
-                if (directories.isEmpty()) {
-                    return;
-                }
-                FileStatus[] statuses = listStatus(directories.remove());
-                for (FileStatus f : statuses) {
-                    if (!f.isDir()) {
-                        files.add(f);
-                        continue;
-                    }
-                    if (!recursive) {
-                        continue;
+                while (files.isEmpty() && !directories.isEmpty()) {
+                    FileStatus[] statuses = listStatus(directories.remove());
+                    for (FileStatus f : statuses) {
+                        if (!f.isDir()) {
+                            files.add(f);
+                            continue;
+                        }
+                        if (!recursive) {
+                            continue;
+                        }
+                        directories.add(f.getPath());
                     }
-                    directories.add(f.getPath());
                 }
             }
 
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java 
b/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
index 0b1c4ad260..1e355df609 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
@@ -381,5 +381,10 @@ public class LocalFileIO implements FileIO {
         public long getModificationTime() {
             return file.lastModified();
         }
+
+        @Override
+        public String toString() {
+            return "{" + "file=" + file + ", length=" + length + ", scheme='" 
+ scheme + '\'' + '}';
+        }
     }
 }
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/table/SimpleTableTestBase.java 
b/paimon-core/src/test/java/org/apache/paimon/table/SimpleTableTestBase.java
index 86e1b6ee72..d925b6df90 100644
--- a/paimon-core/src/test/java/org/apache/paimon/table/SimpleTableTestBase.java
+++ b/paimon-core/src/test/java/org/apache/paimon/table/SimpleTableTestBase.java
@@ -79,9 +79,6 @@ import org.junit.jupiter.api.io.TempDir;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.ValueSource;
 
-import java.io.File;
-import java.nio.file.Files;
-import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -793,10 +790,8 @@ public abstract class SimpleTableTestBase {
         assertThat(result)
                 
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
 
-        List<java.nio.file.Path> files =
-                Files.walk(new 
File(table.location().toUri().getPath()).toPath())
-                        .collect(Collectors.toList());
-        assertThat(files.size()).isEqualTo(14);
+        FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+        assertThat(files).hasSize(8);
     }
 
     // All tags are after the rollback snapshot
@@ -819,25 +814,16 @@ public abstract class SimpleTableTestBase {
         assertThat(result)
                 
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
 
-        List<java.nio.file.Path> files =
-                Files.walk(new 
File(table.location().toUri().getPath()).toPath())
-                        .collect(Collectors.toList());
-        assertThat(files.size()).isEqualTo(15);
-        // table-path
-        // table-path/snapshot
+        FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+        assertThat(files).hasSize(8);
         // table-path/snapshot/LATEST
         // table-path/snapshot/EARLIEST
         // table-path/snapshot/snapshot-1
-        // table-path/pt=0
-        // table-path/pt=0/bucket-0
         // table-path/pt=0/bucket-0/data-0.orc
-        // table-path/manifest
         // table-path/manifest/manifest-list-1
         // table-path/manifest/manifest-0
         // table-path/manifest/manifest-list-0
-        // table-path/schema
         // table-path/schema/schema-0
-        // table-path/tag
     }
 
     // One tag is at the rollback snapshot and others are after it
@@ -871,10 +857,8 @@ public abstract class SimpleTableTestBase {
         assertThat(result)
                 
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
 
-        List<java.nio.file.Path> files =
-                Files.walk(new 
File(table.location().toUri().getPath()).toPath())
-                        .collect(Collectors.toList());
-        assertThat(files.size()).isEqualTo(16);
+        FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+        assertThat(files).hasSize(9);
         // case 0 plus 1:
         // table-path/tag/tag-test3
     }
@@ -912,17 +896,14 @@ public abstract class SimpleTableTestBase {
         assertThat(result)
                 
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
 
-        List<java.nio.file.Path> files =
-                Files.walk(new 
File(table.location().toUri().getPath()).toPath())
-                        .collect(Collectors.toList());
-        assertThat(files.size()).isEqualTo(23);
-        // case 0 plus 7:
+        FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+        assertThat(files).hasSize(14);
+        // case 0 plus 6:
         // table-path/manifest/manifest-list-2
         // table-path/manifest/manifest-list-3
         // table-path/manifest/manifest-1
         // table-path/snapshot/snapshot-2
-        // table-path/pt=1
-        // table-path/pt=1/bucket-0
+        // table-path/tag/tag-test3
         // table-path/pt=1/bucket-0/data-0.orc
     }
 
@@ -968,10 +949,8 @@ public abstract class SimpleTableTestBase {
         assertThat(result)
                 
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
 
-        List<java.nio.file.Path> files =
-                Files.walk(new 
File(table.location().toUri().getPath()).toPath())
-                        .collect(Collectors.toList());
-        assertThat(files.size()).isEqualTo(16);
+        FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+        assertThat(files).hasSize(9);
         // rollback snapshot case 0 plus 1:
         // table-path/tag/tag-test1
     }
@@ -1432,13 +1411,18 @@ public abstract class SimpleTableTestBase {
                         store.manifestFileFactory().create());
 
         List<Path> unusedFileList =
-                Files.walk(Paths.get(tempDir.toString()))
-                        .filter(Files::isRegularFile)
-                        .filter(p -> 
!p.getFileName().toString().startsWith("snapshot"))
-                        .filter(p -> 
!p.getFileName().toString().startsWith("schema"))
-                        .filter(p -> 
!p.getFileName().toString().equals(LATEST))
-                        .filter(p -> 
!p.getFileName().toString().equals(EARLIEST))
-                        .map(p -> new Path(TraceableFileIO.SCHEME + "://" + 
p.toString()))
+                Arrays.stream(table.fileIO().listFiles(table.location(), true))
+                        .map(FileStatus::getPath)
+                        .filter(p -> !p.getName().startsWith("snapshot"))
+                        .filter(p -> !p.getName().startsWith("schema"))
+                        .filter(p -> !p.getName().equals(LATEST))
+                        .filter(p -> !p.getName().equals(EARLIEST))
+                        .map(
+                                p ->
+                                        new Path(
+                                                TraceableFileIO.SCHEME
+                                                        + ":"
+                                                        + 
p.toString().replace("file:", "")))
                         .filter(p -> !filesInUse.contains(p))
                         .collect(Collectors.toList());
 

Reply via email to