This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 93e8cb407c [core] Fix listFilesIterative bug and avoid using
Files.walk in SimpleTableTestBase (#5389)
93e8cb407c is described below
commit 93e8cb407cc53b1e634da2362c5847c86661dd9a
Author: Jingsong Lee <[email protected]>
AuthorDate: Wed Apr 2 14:56:38 2025 +0800
[core] Fix listFilesIterative bug and avoid using Files.walk in
SimpleTableTestBase (#5389)
---
.../src/main/java/org/apache/paimon/fs/FileIO.java | 26 ++++-----
.../org/apache/paimon/fs/local/LocalFileIO.java | 5 ++
.../apache/paimon/table/SimpleTableTestBase.java | 64 ++++++++--------------
3 files changed, 40 insertions(+), 55 deletions(-)
diff --git a/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
b/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
index 0ed13de06c..201f3418e2 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/FileIO.java
@@ -154,22 +154,18 @@ public interface FileIO extends Serializable, Closeable {
}
private void maybeUnpackDirectory() throws IOException {
- if (!files.isEmpty()) {
- return;
- }
- if (directories.isEmpty()) {
- return;
- }
- FileStatus[] statuses = listStatus(directories.remove());
- for (FileStatus f : statuses) {
- if (!f.isDir()) {
- files.add(f);
- continue;
- }
- if (!recursive) {
- continue;
+ while (files.isEmpty() && !directories.isEmpty()) {
+ FileStatus[] statuses = listStatus(directories.remove());
+ for (FileStatus f : statuses) {
+ if (!f.isDir()) {
+ files.add(f);
+ continue;
+ }
+ if (!recursive) {
+ continue;
+ }
+ directories.add(f.getPath());
}
- directories.add(f.getPath());
}
}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
b/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
index 0b1c4ad260..1e355df609 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
@@ -381,5 +381,10 @@ public class LocalFileIO implements FileIO {
public long getModificationTime() {
return file.lastModified();
}
+
+ @Override
+ public String toString() {
+ return "{" + "file=" + file + ", length=" + length + ", scheme='"
+ scheme + '\'' + '}';
+ }
}
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/SimpleTableTestBase.java
b/paimon-core/src/test/java/org/apache/paimon/table/SimpleTableTestBase.java
index 86e1b6ee72..d925b6df90 100644
--- a/paimon-core/src/test/java/org/apache/paimon/table/SimpleTableTestBase.java
+++ b/paimon-core/src/test/java/org/apache/paimon/table/SimpleTableTestBase.java
@@ -79,9 +79,6 @@ import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
-import java.io.File;
-import java.nio.file.Files;
-import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -793,10 +790,8 @@ public abstract class SimpleTableTestBase {
assertThat(result)
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
- List<java.nio.file.Path> files =
- Files.walk(new
File(table.location().toUri().getPath()).toPath())
- .collect(Collectors.toList());
- assertThat(files.size()).isEqualTo(14);
+ FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+ assertThat(files).hasSize(8);
}
// All tags are after the rollback snapshot
@@ -819,25 +814,16 @@ public abstract class SimpleTableTestBase {
assertThat(result)
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
- List<java.nio.file.Path> files =
- Files.walk(new
File(table.location().toUri().getPath()).toPath())
- .collect(Collectors.toList());
- assertThat(files.size()).isEqualTo(15);
- // table-path
- // table-path/snapshot
+ FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+ assertThat(files).hasSize(8);
// table-path/snapshot/LATEST
// table-path/snapshot/EARLIEST
// table-path/snapshot/snapshot-1
- // table-path/pt=0
- // table-path/pt=0/bucket-0
// table-path/pt=0/bucket-0/data-0.orc
- // table-path/manifest
// table-path/manifest/manifest-list-1
// table-path/manifest/manifest-0
// table-path/manifest/manifest-list-0
- // table-path/schema
// table-path/schema/schema-0
- // table-path/tag
}
// One tag is at the rollback snapshot and others are after it
@@ -871,10 +857,8 @@ public abstract class SimpleTableTestBase {
assertThat(result)
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
- List<java.nio.file.Path> files =
- Files.walk(new
File(table.location().toUri().getPath()).toPath())
- .collect(Collectors.toList());
- assertThat(files.size()).isEqualTo(16);
+ FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+ assertThat(files).hasSize(9);
// case 0 plus 1:
// table-path/tag/tag-test3
}
@@ -912,17 +896,14 @@ public abstract class SimpleTableTestBase {
assertThat(result)
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
- List<java.nio.file.Path> files =
- Files.walk(new
File(table.location().toUri().getPath()).toPath())
- .collect(Collectors.toList());
- assertThat(files.size()).isEqualTo(23);
- // case 0 plus 7:
+ FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+ assertThat(files).hasSize(14);
+ // case 0 plus 6:
// table-path/manifest/manifest-list-2
// table-path/manifest/manifest-list-3
// table-path/manifest/manifest-1
// table-path/snapshot/snapshot-2
- // table-path/pt=1
- // table-path/pt=1/bucket-0
+ // table-path/tag/tag-test3
// table-path/pt=1/bucket-0/data-0.orc
}
@@ -968,10 +949,8 @@ public abstract class SimpleTableTestBase {
assertThat(result)
.containsExactlyInAnyOrder("0|0|0|binary|varbinary|mapKey:mapVal|multiset");
- List<java.nio.file.Path> files =
- Files.walk(new
File(table.location().toUri().getPath()).toPath())
- .collect(Collectors.toList());
- assertThat(files.size()).isEqualTo(16);
+ FileStatus[] files = table.fileIO().listFiles(table.location(), true);
+ assertThat(files).hasSize(9);
// rollback snapshot case 0 plus 1:
// table-path/tag/tag-test1
}
@@ -1432,13 +1411,18 @@ public abstract class SimpleTableTestBase {
store.manifestFileFactory().create());
List<Path> unusedFileList =
- Files.walk(Paths.get(tempDir.toString()))
- .filter(Files::isRegularFile)
- .filter(p ->
!p.getFileName().toString().startsWith("snapshot"))
- .filter(p ->
!p.getFileName().toString().startsWith("schema"))
- .filter(p ->
!p.getFileName().toString().equals(LATEST))
- .filter(p ->
!p.getFileName().toString().equals(EARLIEST))
- .map(p -> new Path(TraceableFileIO.SCHEME + "://" +
p.toString()))
+ Arrays.stream(table.fileIO().listFiles(table.location(), true))
+ .map(FileStatus::getPath)
+ .filter(p -> !p.getName().startsWith("snapshot"))
+ .filter(p -> !p.getName().startsWith("schema"))
+ .filter(p -> !p.getName().equals(LATEST))
+ .filter(p -> !p.getName().equals(EARLIEST))
+ .map(
+ p ->
+ new Path(
+ TraceableFileIO.SCHEME
+ + ":"
+ +
p.toString().replace("file:", "")))
.filter(p -> !filesInUse.contains(p))
.collect(Collectors.toList());