This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 86b172c304 [filesystems] Listing OSS objects with metadata (#5046)
86b172c304 is described below

commit 86b172c304de1ce7019d0d7fea2ca74530cac392
Author: Xiaoguang Zhu <[email protected]>
AuthorDate: Mon Mar 31 11:33:34 2025 +0800

    [filesystems] Listing OSS objects with metadata (#5046)
---
 .../generated/catalog_configuration.html           |   6 +
 .../org/apache/paimon/options/CatalogOptions.java  |   7 +
 .../apache/paimon/oss/HadoopCompliantFileIO.java   |   4 +-
 .../main/java/org/apache/paimon/oss/OSSFileIO.java | 163 ++++++++++++++++++++-
 4 files changed, 177 insertions(+), 3 deletions(-)

diff --git a/docs/layouts/shortcodes/generated/catalog_configuration.html 
b/docs/layouts/shortcodes/generated/catalog_configuration.html
index d2c989bd0d..d997831643 100644
--- a/docs/layouts/shortcodes/generated/catalog_configuration.html
+++ b/docs/layouts/shortcodes/generated/catalog_configuration.html
@@ -86,6 +86,12 @@ under the License.
             <td>Boolean</td>
             <td>Whether to allow static cache in file io implementation. If 
not allowed, this means that there may be a large number of FileIO instances 
generated, enabling caching can lead to resource leakage.</td>
         </tr>
+        <tr>
+            <td><h5>file-io.populate-meta</h5></td>
+            <td style="word-wrap: break-word;">false</td>
+            <td>Boolean</td>
+            <td>Whether to populate file metadata while listing or getting 
file status.</td>
+        </tr>
         <tr>
             <td><h5>format-table.enabled</h5></td>
             <td style="word-wrap: break-word;">true</td>
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/options/CatalogOptions.java 
b/paimon-common/src/main/java/org/apache/paimon/options/CatalogOptions.java
index 29b04cc9d0..071e8011eb 100644
--- a/paimon-common/src/main/java/org/apache/paimon/options/CatalogOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/options/CatalogOptions.java
@@ -167,4 +167,11 @@ public class CatalogOptions {
                             "Whether to allow static cache in file io 
implementation. If not allowed, this means that "
                                     + "there may be a large number of FileIO 
instances generated, enabling caching can "
                                     + "lead to resource leakage.");
+
+    public static final ConfigOption<Boolean> FILE_IO_POPULATE_META =
+            ConfigOptions.key("file-io.populate-meta")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription(
+                            "Whether to populate file metadata while listing 
or getting file status.");
 }
diff --git 
a/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/HadoopCompliantFileIO.java
 
b/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/HadoopCompliantFileIO.java
index 29ec82c9e7..bc0d4ffb3a 100644
--- 
a/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/HadoopCompliantFileIO.java
+++ 
b/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/HadoopCompliantFileIO.java
@@ -127,11 +127,11 @@ public abstract class HadoopCompliantFileIO implements 
FileIO {
         return getFileSystem(hadoopSrc).rename(hadoopSrc, hadoopDst);
     }
 
-    private org.apache.hadoop.fs.Path path(Path path) {
+    protected final org.apache.hadoop.fs.Path path(Path path) {
         return new org.apache.hadoop.fs.Path(path.toUri());
     }
 
-    private FileSystem getFileSystem(org.apache.hadoop.fs.Path path) throws 
IOException {
+    protected final FileSystem getFileSystem(org.apache.hadoop.fs.Path path) 
throws IOException {
         if (fsMap == null) {
             synchronized (this) {
                 if (fsMap == null) {
diff --git 
a/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/OSSFileIO.java
 
b/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/OSSFileIO.java
index 3b3936c976..bbaf2d672c 100644
--- 
a/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/OSSFileIO.java
+++ 
b/paimon-filesystems/paimon-oss-impl/src/main/java/org/apache/paimon/oss/OSSFileIO.java
@@ -20,15 +20,21 @@ package org.apache.paimon.oss;
 
 import org.apache.paimon.catalog.CatalogContext;
 import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.FileStatus;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.RemoteIterator;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.utils.IOUtils;
 
+import com.aliyun.oss.model.ObjectMetadata;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.annotation.Nullable;
+
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.net.URI;
@@ -39,6 +45,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Supplier;
 
 import static org.apache.paimon.options.CatalogOptions.FILE_IO_ALLOW_CACHE;
+import static org.apache.paimon.options.CatalogOptions.FILE_IO_POPULATE_META;
 
 /** OSS {@link FileIO}. */
 public class OSSFileIO extends HadoopCompliantFileIO {
@@ -77,6 +84,7 @@ public class OSSFileIO extends HadoopCompliantFileIO {
 
     private Options hadoopOptions;
     private boolean allowCache = true;
+    private boolean populateMeta = false;
 
     @Override
     public boolean isObjectStore() {
@@ -86,6 +94,7 @@ public class OSSFileIO extends HadoopCompliantFileIO {
     @Override
     public void configure(CatalogContext context) {
         allowCache = context.options().get(FILE_IO_ALLOW_CACHE);
+        populateMeta = context.options().get(FILE_IO_POPULATE_META);
         hadoopOptions = new Options();
         // read all configuration with prefix 'CONFIG_PREFIXES'
         for (String key : context.options().keySet()) {
@@ -107,7 +116,7 @@ public class OSSFileIO extends HadoopCompliantFileIO {
     }
 
     @Override
-    protected FileSystem createFileSystem(org.apache.hadoop.fs.Path path) {
+    protected AliyunOSSFileSystem createFileSystem(org.apache.hadoop.fs.Path 
path) {
         final String scheme = path.toUri().getScheme();
         final String authority = path.toUri().getAuthority();
         Supplier<AliyunOSSFileSystem> supplier =
@@ -183,4 +192,156 @@ public class OSSFileIO extends HadoopCompliantFileIO {
             return Objects.hash(options, scheme, authority);
         }
     }
+
+    @Override
+    public FileStatus getFileStatus(Path path) throws IOException {
+        FileStatus basic = super.getFileStatus(path);
+        if (!populateMeta) {
+            return basic;
+        }
+        AliyunOSSFileSystem fs = (AliyunOSSFileSystem) 
getFileSystem(path(path));
+        return getExtendedFileStatus(fs, basic);
+    }
+
+    @Override
+    public FileStatus[] listStatus(Path path) throws IOException {
+        FileStatus[] basic = super.listStatus(path);
+        if (!populateMeta) {
+            return basic;
+        }
+        AliyunOSSFileSystem fs = (AliyunOSSFileSystem) 
getFileSystem(path(path));
+        FileStatus[] extended = new FileStatus[basic.length];
+        for (int i = 0; i < basic.length; i++) {
+            extended[i] = getExtendedFileStatus(fs, basic[i]);
+        }
+        return extended;
+    }
+
+    @Override
+    public RemoteIterator<FileStatus> listFilesIterative(Path path, boolean 
recursive)
+            throws IOException {
+        RemoteIterator<FileStatus> basicIter = super.listFilesIterative(path, 
recursive);
+        if (!populateMeta) {
+            return basicIter;
+        }
+        AliyunOSSFileSystem fs = (AliyunOSSFileSystem) 
getFileSystem(path(path));
+        return new RemoteIterator<FileStatus>() {
+            @Override
+            public boolean hasNext() throws IOException {
+                return basicIter.hasNext();
+            }
+
+            @Override
+            public FileStatus next() throws IOException {
+                FileStatus basic = basicIter.next();
+                return getExtendedFileStatus(fs, basic);
+            }
+
+            @Override
+            public void close() throws IOException {
+                basicIter.close();
+            }
+        };
+    }
+
+    private ExtendedFileStatus getExtendedFileStatus(AliyunOSSFileSystem fs, 
FileStatus status) {
+        org.apache.hadoop.fs.Path path = path(status.getPath());
+        if (!path.isAbsolute()) {
+            path = new org.apache.hadoop.fs.Path(fs.getWorkingDirectory(), 
path);
+        }
+        String objKey = path.toUri().getPath().substring(1);
+        ObjectMetadata meta = fs.getStore().getObjectMetadata(objKey);
+        return new ExtendedFileStatus(status, meta);
+    }
+
+    private static class ExtendedFileStatus implements FileStatus {
+
+        private final FileStatus basic;
+        @Nullable private final com.aliyun.oss.model.ObjectMetadata meta;
+
+        private ExtendedFileStatus(
+                FileStatus basic, @Nullable 
com.aliyun.oss.model.ObjectMetadata meta) {
+            this.basic = basic;
+            this.meta = meta;
+        }
+
+        @Override
+        public long getLen() {
+            return basic.getLen();
+        }
+
+        @Override
+        public boolean isDir() {
+            return basic.isDir();
+        }
+
+        @Override
+        public Path getPath() {
+            return basic.getPath();
+        }
+
+        @Override
+        public long getModificationTime() {
+            return basic.getModificationTime();
+        }
+
+        @Override
+        public long getAccessTime() {
+            return basic.getAccessTime();
+        }
+
+        @Nullable
+        @Override
+        public String getOwner() {
+            return basic.getOwner();
+        }
+
+        @Nullable
+        @Override
+        public Integer getGeneration() {
+            return basic.getGeneration();
+        }
+
+        @Nullable
+        @Override
+        public String getContentType() {
+            if (meta == null) {
+                return basic.getContentType();
+            }
+            return meta.getContentType();
+        }
+
+        @Nullable
+        @Override
+        public String getStorageClass() {
+            return basic.getStorageClass();
+        }
+
+        @Nullable
+        @Override
+        public String getMd5Hash() {
+            if (meta == null) {
+                return basic.getMd5Hash();
+            }
+            return meta.getContentMD5();
+        }
+
+        @Nullable
+        @Override
+        public Long getMetadataModificationTime() {
+            if (meta == null) {
+                return basic.getMetadataModificationTime();
+            }
+            return meta.getLastModified().getTime();
+        }
+
+        @Nullable
+        @Override
+        public Map<String, String> getMetadata() {
+            if (meta == null) {
+                return basic.getMetadata();
+            }
+            return meta.getUserMetadata();
+        }
+    }
 }

Reply via email to