This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 86b172c304 [filesystems] Listing OSS objects with metadata (#5046)
86b172c304 is described below
commit 86b172c304de1ce7019d0d7fea2ca74530cac392
Author: Xiaoguang Zhu <[email protected]>
AuthorDate: Mon Mar 31 11:33:34 2025 +0800
[filesystems] Listing OSS objects with metadata (#5046)
---
.../generated/catalog_configuration.html | 6 +
.../org/apache/paimon/options/CatalogOptions.java | 7 +
.../apache/paimon/oss/HadoopCompliantFileIO.java | 4 +-
.../main/java/org/apache/paimon/oss/OSSFileIO.java | 163 ++++++++++++++++++++-
4 files changed, 177 insertions(+), 3 deletions(-)
diff --git a/docs/layouts/shortcodes/generated/catalog_configuration.html
b/docs/layouts/shortcodes/generated/catalog_configuration.html
index d2c989bd0d..d997831643 100644
--- a/docs/layouts/shortcodes/generated/catalog_configuration.html
+++ b/docs/layouts/shortcodes/generated/catalog_configuration.html
@@ -86,6 +86,12 @@ under the License.
<td>Boolean</td>
<td>Whether to allow static cache in file io implementation. If
not allowed, this means that there may be a large number of FileIO instances
generated, enabling caching can lead to resource leakage.</td>
</tr>
+ <tr>
+ <td><h5>file-io.populate-meta</h5></td>
+ <td style="word-wrap: break-word;">false</td>
+ <td>Boolean</td>
+ <td>Whether to populate file metadata while listing or getting
file status.</td>
+ </tr>
<tr>
<td><h5>format-table.enabled</h5></td>
<td style="word-wrap: break-word;">true</td>
diff --git
a/paimon-common/src/main/java/org/apache/paimon/options/CatalogOptions.java
b/paimon-common/src/main/java/org/apache/paimon/options/CatalogOptions.java
index 29b04cc9d0..071e8011eb 100644
--- a/paimon-common/src/main/java/org/apache/paimon/options/CatalogOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/options/CatalogOptions.java
@@ -167,4 +167,11 @@ public class CatalogOptions {
"Whether to allow static cache in file io
implementation. If not allowed, this means that "
+ "there may be a large number of FileIO
instances generated, enabling caching can "
+ "lead to resource leakage.");
+
+ public static final ConfigOption<Boolean> FILE_IO_POPULATE_META =
+ ConfigOptions.key("file-io.populate-meta")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription(
+ "Whether to populate file metadata while listing
or getting file status.");
}
diff --git
a/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/HadoopCompliantFileIO.java
b/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/HadoopCompliantFileIO.java
index 29ec82c9e7..bc0d4ffb3a 100644
---
a/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/HadoopCompliantFileIO.java
+++
b/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/HadoopCompliantFileIO.java
@@ -127,11 +127,11 @@ public abstract class HadoopCompliantFileIO implements
FileIO {
return getFileSystem(hadoopSrc).rename(hadoopSrc, hadoopDst);
}
- private org.apache.hadoop.fs.Path path(Path path) {
+ protected final org.apache.hadoop.fs.Path path(Path path) {
return new org.apache.hadoop.fs.Path(path.toUri());
}
- private FileSystem getFileSystem(org.apache.hadoop.fs.Path path) throws
IOException {
+ protected final FileSystem getFileSystem(org.apache.hadoop.fs.Path path)
throws IOException {
if (fsMap == null) {
synchronized (this) {
if (fsMap == null) {
diff --git
a/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/OSSFileIO.java
b/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/OSSFileIO.java
index 3b3936c976..bbaf2d672c 100644
---
a/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/OSSFileIO.java
+++
b/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/OSSFileIO.java
@@ -20,15 +20,21 @@ package org.apache.paimon.oss;
import org.apache.paimon.catalog.CatalogContext;
import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.FileStatus;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.RemoteIterator;
import org.apache.paimon.options.Options;
import org.apache.paimon.utils.IOUtils;
+import com.aliyun.oss.model.ObjectMetadata;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import javax.annotation.Nullable;
+
import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.URI;
@@ -39,6 +45,7 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Supplier;
import static org.apache.paimon.options.CatalogOptions.FILE_IO_ALLOW_CACHE;
+import static org.apache.paimon.options.CatalogOptions.FILE_IO_POPULATE_META;
/** OSS {@link FileIO}. */
public class OSSFileIO extends HadoopCompliantFileIO {
@@ -77,6 +84,7 @@ public class OSSFileIO extends HadoopCompliantFileIO {
private Options hadoopOptions;
private boolean allowCache = true;
+ private boolean populateMeta = false;
@Override
public boolean isObjectStore() {
@@ -86,6 +94,7 @@ public class OSSFileIO extends HadoopCompliantFileIO {
@Override
public void configure(CatalogContext context) {
allowCache = context.options().get(FILE_IO_ALLOW_CACHE);
+ populateMeta = context.options().get(FILE_IO_POPULATE_META);
hadoopOptions = new Options();
// read all configuration with prefix 'CONFIG_PREFIXES'
for (String key : context.options().keySet()) {
@@ -107,7 +116,7 @@ public class OSSFileIO extends HadoopCompliantFileIO {
}
@Override
- protected FileSystem createFileSystem(org.apache.hadoop.fs.Path path) {
+ protected AliyunOSSFileSystem createFileSystem(org.apache.hadoop.fs.Path
path) {
final String scheme = path.toUri().getScheme();
final String authority = path.toUri().getAuthority();
Supplier<AliyunOSSFileSystem> supplier =
@@ -183,4 +192,156 @@ public class OSSFileIO extends HadoopCompliantFileIO {
return Objects.hash(options, scheme, authority);
}
}
+
+ @Override
+ public FileStatus getFileStatus(Path path) throws IOException {
+ FileStatus basic = super.getFileStatus(path);
+ if (!populateMeta) {
+ return basic;
+ }
+ AliyunOSSFileSystem fs = (AliyunOSSFileSystem)
getFileSystem(path(path));
+ return getExtendedFileStatus(fs, basic);
+ }
+
+ @Override
+ public FileStatus[] listStatus(Path path) throws IOException {
+ FileStatus[] basic = super.listStatus(path);
+ if (!populateMeta) {
+ return basic;
+ }
+ AliyunOSSFileSystem fs = (AliyunOSSFileSystem)
getFileSystem(path(path));
+ FileStatus[] extended = new FileStatus[basic.length];
+ for (int i = 0; i < basic.length; i++) {
+ extended[i] = getExtendedFileStatus(fs, basic[i]);
+ }
+ return extended;
+ }
+
+ @Override
+ public RemoteIterator<FileStatus> listFilesIterative(Path path, boolean
recursive)
+ throws IOException {
+ RemoteIterator<FileStatus> basicIter = super.listFilesIterative(path,
recursive);
+ if (!populateMeta) {
+ return basicIter;
+ }
+ AliyunOSSFileSystem fs = (AliyunOSSFileSystem)
getFileSystem(path(path));
+ return new RemoteIterator<FileStatus>() {
+ @Override
+ public boolean hasNext() throws IOException {
+ return basicIter.hasNext();
+ }
+
+ @Override
+ public FileStatus next() throws IOException {
+ FileStatus basic = basicIter.next();
+ return getExtendedFileStatus(fs, basic);
+ }
+
+ @Override
+ public void close() throws IOException {
+ basicIter.close();
+ }
+ };
+ }
+
+ private ExtendedFileStatus getExtendedFileStatus(AliyunOSSFileSystem fs,
FileStatus status) {
+ org.apache.hadoop.fs.Path path = path(status.getPath());
+ if (!path.isAbsolute()) {
+ path = new org.apache.hadoop.fs.Path(fs.getWorkingDirectory(),
path);
+ }
+ String objKey = path.toUri().getPath().substring(1);
+ ObjectMetadata meta = fs.getStore().getObjectMetadata(objKey);
+ return new ExtendedFileStatus(status, meta);
+ }
+
+ private static class ExtendedFileStatus implements FileStatus {
+
+ private final FileStatus basic;
+ @Nullable private final com.aliyun.oss.model.ObjectMetadata meta;
+
+ private ExtendedFileStatus(
+ FileStatus basic, @Nullable
com.aliyun.oss.model.ObjectMetadata meta) {
+ this.basic = basic;
+ this.meta = meta;
+ }
+
+ @Override
+ public long getLen() {
+ return basic.getLen();
+ }
+
+ @Override
+ public boolean isDir() {
+ return basic.isDir();
+ }
+
+ @Override
+ public Path getPath() {
+ return basic.getPath();
+ }
+
+ @Override
+ public long getModificationTime() {
+ return basic.getModificationTime();
+ }
+
+ @Override
+ public long getAccessTime() {
+ return basic.getAccessTime();
+ }
+
+ @Nullable
+ @Override
+ public String getOwner() {
+ return basic.getOwner();
+ }
+
+ @Nullable
+ @Override
+ public Integer getGeneration() {
+ return basic.getGeneration();
+ }
+
+ @Nullable
+ @Override
+ public String getContentType() {
+ if (meta == null) {
+ return basic.getContentType();
+ }
+ return meta.getContentType();
+ }
+
+ @Nullable
+ @Override
+ public String getStorageClass() {
+ return basic.getStorageClass();
+ }
+
+ @Nullable
+ @Override
+ public String getMd5Hash() {
+ if (meta == null) {
+ return basic.getMd5Hash();
+ }
+ return meta.getContentMD5();
+ }
+
+ @Nullable
+ @Override
+ public Long getMetadataModificationTime() {
+ if (meta == null) {
+ return basic.getMetadataModificationTime();
+ }
+ return meta.getLastModified().getTime();
+ }
+
+ @Nullable
+ @Override
+ public Map<String, String> getMetadata() {
+ if (meta == null) {
+ return basic.getMetadata();
+ }
+ return meta.getUserMetadata();
+ }
+ }
}