This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new aabcd06be9 [fs] Add filesystem support for Microsoft Azure Storage
(#5379)
aabcd06be9 is described below
commit aabcd06be92d4c5f3a0af7a2eabb1a0d1ea26a6e
Author: qining <[email protected]>
AuthorDate: Tue Apr 1 13:10:36 2025 +0800
[fs] Add filesystem support for Microsoft Azure Storage (#5379)
---
docs/content/maintenance/filesystems.md | 94 +++++++
paimon-filesystems/paimon-azure-impl/pom.xml | 200 +++++++++++++++
.../java/org/apache/paimon/azure/AzureFileIO.java | 157 ++++++++++++
.../apache/paimon/azure/HadoopCompliantFileIO.java | 285 +++++++++++++++++++++
.../src/main/resources/META-INF/NOTICE | 91 +++++++
.../META-INF/licenses/LICENSE.animal-sniffer | 21 ++
.../licenses/LICENSE.checker-framework-qualifiers | 22 ++
.../resources/META-INF/licenses/LICENSE.dnsjava | 30 +++
.../resources/META-INF/licenses/LICENSE.jakarta | 28 ++
.../main/resources/META-INF/licenses/LICENSE.jaxb | 135 ++++++++++
.../main/resources/META-INF/licenses/LICENSE.re2j | 32 +++
.../resources/META-INF/licenses/LICENSE.stax2api | 22 ++
paimon-filesystems/paimon-azure/pom.xml | 143 +++++++++++
.../java/org/apache/paimon/azure/AzureLoader.java | 88 +++++++
.../org.apache.paimon.fs.FileIOLoader | 16 ++
paimon-filesystems/pom.xml | 2 +
16 files changed, 1366 insertions(+)
diff --git a/docs/content/maintenance/filesystems.md
b/docs/content/maintenance/filesystems.md
index a56c45126d..55efb94851 100644
--- a/docs/content/maintenance/filesystems.md
+++ b/docs/content/maintenance/filesystems.md
@@ -242,6 +242,7 @@ SELECT * FROM test_table;
SELECT COUNT(1) FROM test_table;
```
+
{{< /tab >}}
{{< tab "Trino" >}}
@@ -420,3 +421,96 @@ CREATE CATALOG my_catalog WITH (
{{< /tab >}}
{{< /tabs >}}
+
+## Microsoft Azure Storage
+
+{{< stable >}}
+
+Download [paimon-azure-{{< version
>}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-gs/{{<
version >}}/paimon-gs-{{< version >}}.jar).
+
+{{< /stable >}}
+
+{{< unstable >}}
+
+Download [paimon-gs-{{< version
>}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-gs/{{<
version >}}/).
+
+{{< /unstable >}}
+
+{{< tabs "gs" >}}
+
+{{< tab "Flink" >}}
+
+{{< hint info >}}
+If you have already configured [oss access through
Flink](https://nightlies.apache.org/flink/flink-docs-release-2.0/docs/deployment/filesystems/gcs/)
(Via Flink FileSystem),
+here you can skip the following configuration.
+{{< /hint >}}
+
+Put `paimon-gs-{{< version >}}.jar` into `lib` directory of your Flink home,
and create catalog:
+
+```sql
+CREATE CATALOG my_catalog WITH (
+ 'type' = 'paimon',
+ 'warehouse' = 'oss://<bucket>/<path>',
+ 'fs.gs.auth.type' = 'SERVICE_ACCOUNT_JSON_KEYFILE',
+ 'fs.gs.auth.service.account.json.keyfile' =
'/path/to/service-account-.json'
+);
+```
+
+{{< /tab >}}
+
+{{< /tabs >}}
+
+
+## Microsoft Azure Storage
+
+{{< stable >}}
+
+Download [paimon-azure-{{< version
>}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-azure/{{<
version >}}/paimon-azure-{{< version >}}.jar).
+
+{{< /stable >}}
+
+{{< unstable >}}
+
+Download [paimon-azure-{{< version
>}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-azure/{{<
version >}}/).
+
+{{< /unstable >}}
+
+{{< tabs "azure" >}}
+
+{{< tab "Flink" >}}
+
+{{< hint info >}}
+If you have already configured [azure access through
Flink](https://nightlies.apache.org/flink/flink-docs-release-2.0/docs/deployment/filesystems/azure/)
(Via Flink FileSystem),
+here you can skip the following configuration.
+{{< /hint >}}
+
+Put `paimon-azure-{{< version >}}.jar` into `lib` directory of your Flink
home, and create catalog:
+
+```sql
+CREATE CATALOG my_catalog WITH (
+ 'type' = 'paimon',
+ 'warehouse' = 'wasb://,<container>@<account>.blob.core.windows.net/<path>',
+ 'fs.azure.account.key.Account.blob.core.windows.net' = 'yyy'
+);
+```
+
+{{< /tab >}}
+
+{{< tab "Spark" >}}
+
+{{< hint info >}}
+If you have already configured azure access through Spark (Via Hadoop
FileSystem), here you can skip the following configuration.
+{{< /hint >}}
+
+Place `paimon-azure-{{< version >}}.jar` together with `paimon-spark-{{<
version >}}.jar` under Spark's jars directory, and start like
+
+```shell
+spark-sql \
+ --conf spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog \
+ --conf
spark.sql.catalog.paimon.warehouse=wasb://,<container>@<account>.blob.core.windows.net/<path>
\
+ --conf fs.azure.account.key.Account.blob.core.windows.net=yyy \
+```
+
+{{< /tab >}}
+
+{{< /tabs >}}
diff --git a/paimon-filesystems/paimon-azure-impl/pom.xml
b/paimon-filesystems/paimon-azure-impl/pom.xml
new file mode 100644
index 0000000000..43d3da0628
--- /dev/null
+++ b/paimon-filesystems/paimon-azure-impl/pom.xml
@@ -0,0 +1,200 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>paimon-filesystems</artifactId>
+ <groupId>org.apache.paimon</groupId>
+ <version>1.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>paimon-azure-impl</artifactId>
+ <name>Paimon : FileSystems : AZURE : Impl</name>
+ <packaging>jar</packaging>
+
+ <properties>
+ <fs.azure.sdk.version>3.3.4</fs.azure.sdk.version>
+ <fs.azure.api.version>1.16.0</fs.azure.api.version>
+ </properties>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.paimon</groupId>
+ <artifactId>paimon-hadoop-shaded</artifactId>
+ <version>${project.version}</version>
+
+ </dependency>
+ <dependency>
+ <groupId>org.apache.paimon</groupId>
+ <artifactId>paimon-common</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-azure</artifactId>
+ <version>${fs.hadoopshaded.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.microsoft.azure</groupId>
+ <artifactId>azure</artifactId>
+ </exclusion>
+ <exclusion>
+ <!-- provided by paimon-hadoop-shaded -->
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>ch.qos.reload4j</groupId>
+ <artifactId>reload4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-reload4j</artifactId>
+ </exclusion>
+
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.microsoft.azure</groupId>
+ <artifactId>azure</artifactId>
+ <version>${fs.azure.api.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <!-- Hadoop requires jaxb-api for javax.xml.bind.JAXBException -->
+ <groupId>javax.xml.bind</groupId>
+ <artifactId>jaxb-api</artifactId>
+ <version>${jaxb.api.version}</version>
+ <!-- packaged as an optional dependency that is only accessible on
Java 11+ -->
+ <scope>provided</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <archive>
+ <manifestEntries>
+ <!-- jaxb-api is packaged as an optional
dependency that is only accessible on Java 11 -->
+ <Multi-Release>true</Multi-Release>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-javax-jars</id>
+ <phase>process-resources</phase>
+ <goals>
+ <goal>copy</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <artifactItems>
+ <artifactItem>
+ <groupId>javax.xml.bind</groupId>
+ <artifactId>jaxb-api</artifactId>
+ <version>${jaxb.api.version}</version>
+ <type>jar</type>
+ <overWrite>true</overWrite>
+ </artifactItem>
+ </artifactItems>
+
<outputDirectory>${project.build.directory}/temporary</outputDirectory>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>unpack-javax-libraries</id>
+ <phase>process-resources</phase>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ <configuration>
+ <target>
+ <echo message="unpacking javax jars"/>
+ <unzip
dest="${project.build.directory}/classes/META-INF/versions/11">
+ <fileset
dir="${project.build.directory}/temporary">
+ <include name="*"/>
+ </fileset>
+ </unzip>
+ </target>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>shade-paimon</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*</artifact>
+ <excludes>
+ <exclude>.gitkeep</exclude>
+ <exclude>mime.types</exclude>
+ <exclude>mozilla/**</exclude>
+ <exclude>META-INF/maven/**</exclude>
+
<exclude>META-INF/versions/11/META-INF/maven/**</exclude>
+ <exclude>META-INF/LICENSE.txt</exclude>
+ </excludes>
+ </filter>
+ <filter>
+ <artifact>*</artifact>
+ <excludes>
+ <exclude>properties.dtd</exclude>
+ <exclude>PropertyList-1.0.dtd</exclude>
+
<exclude>META-INF/services/javax.xml.stream.*</exclude>
+ <exclude>META-INF/LICENSE.txt</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
\ No newline at end of file
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/java/org/apache/paimon/azure/AzureFileIO.java
b/paimon-filesystems/paimon-azure-impl/src/main/java/org/apache/paimon/azure/AzureFileIO.java
new file mode 100644
index 0000000000..36f7d5f37c
--- /dev/null
+++
b/paimon-filesystems/paimon-azure-impl/src/main/java/org/apache/paimon/azure/AzureFileIO.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.azure;
+
+import org.apache.paimon.catalog.CatalogContext;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.options.Options;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.azure.NativeAzureFileSystem;
+import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.net.URI;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ConcurrentHashMap;
+
+/** Azure Blob Storage {@link FileIO}. */
+public class AzureFileIO extends HadoopCompliantFileIO {
+ private static final long serialVersionUID = 1L;
+ private static final Logger LOG =
LoggerFactory.getLogger(AzureFileIO.class);
+
+ private static final String[] CONFIG_PREFIXES = {"azure.", "fs.azure.",
"fs.wasb."};
+
+ private static final String HADOOP_CONFIG_PREFIX = "fs.azure.";
+
+ private static final String ABFS_SCHEME = "abfs";
+
+ private static final String[][] MIRRORED_CONFIG_KEYS = {
+ {"fs.azure.account.key",
"fs.azure.account.key.ACCOUNT.blob.core.windows.net"},
+ {"fs.azure.account.auth.type",
"fs.azure.account.oauth2.client.endpoint"}
+ };
+
+ private static final Map<CacheKey, FileSystem> CACHE = new
ConcurrentHashMap<>();
+
+ private Options hadoopOptions;
+
+ @Override
+ public boolean isObjectStore() {
+ return true;
+ }
+
+ @Override
+ public void configure(CatalogContext context) {
+ this.hadoopOptions =
mirrorCertainHadoopConfig(loadHadoopConfigFromContext(context));
+ }
+
+ // add additional config entries from the IO config to the Hadoop config
+ private Options loadHadoopConfigFromContext(CatalogContext context) {
+ Options hadoopConfig = new Options();
+ for (String key : context.options().keySet()) {
+ for (String prefix : CONFIG_PREFIXES) {
+ if (key.startsWith(prefix)) {
+ String newKey = HADOOP_CONFIG_PREFIX +
key.substring(prefix.length());
+ String value = context.options().get(key);
+ hadoopConfig.set(newKey, value);
+
+ LOG.debug("Adding config entry for {} as {} to Hadoop
config", key, newKey);
+ }
+ }
+ }
+ return hadoopConfig;
+ }
+
+ // mirror certain keys to make use more uniform across implementations
+ // with different keys
+ private Options mirrorCertainHadoopConfig(Options hadoopConfig) {
+ for (String[] mirrored : MIRRORED_CONFIG_KEYS) {
+ String value = hadoopConfig.get(mirrored[0]);
+ if (value != null) {
+ hadoopConfig.set(mirrored[1], value);
+ }
+ }
+ return hadoopConfig;
+ }
+
+ @Override
+ protected FileSystem createFileSystem(org.apache.hadoop.fs.Path path) {
+ final String scheme = path.toUri().getScheme();
+ final String authority = path.toUri().getAuthority();
+ return CACHE.computeIfAbsent(
+ new CacheKey(hadoopOptions, scheme, authority),
+ key -> {
+ Configuration hadoopConf = new Configuration();
+ key.options.toMap().forEach(hadoopConf::set);
+ URI fsUri = path.toUri();
+ FileSystem fs;
+
+ if (ABFS_SCHEME.equalsIgnoreCase(scheme)) {
+ fs = new AzureBlobFileSystem();
+ LOG.debug("Creating ABFS filesystem for {}", fsUri);
+ } else {
+ fs = new NativeAzureFileSystem();
+ LOG.debug("Creating WASB filesystem for {}", fsUri);
+ }
+ try {
+ fs.initialize(fsUri, hadoopConf);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ return fs;
+ });
+ }
+
+ private static class CacheKey {
+
+ private final Options options;
+ private final String scheme;
+ private final String authority;
+
+ private CacheKey(Options options, String scheme, String authority) {
+ this.options = options;
+ this.scheme = scheme;
+ this.authority = authority;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ CacheKey cacheKey = (CacheKey) o;
+ return Objects.equals(options, cacheKey.options)
+ && Objects.equals(scheme, cacheKey.scheme)
+ && Objects.equals(authority, cacheKey.authority);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(options, scheme, authority);
+ }
+ }
+}
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/java/org/apache/paimon/azure/HadoopCompliantFileIO.java
b/paimon-filesystems/paimon-azure-impl/src/main/java/org/apache/paimon/azure/HadoopCompliantFileIO.java
new file mode 100644
index 0000000000..0758bb4437
--- /dev/null
+++
b/paimon-filesystems/paimon-azure-impl/src/main/java/org/apache/paimon/azure/HadoopCompliantFileIO.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.azure;
+
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.FileStatus;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.PositionOutputStream;
+import org.apache.paimon.fs.SeekableInputStream;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+
+import javax.annotation.Nullable;
+
+import java.io.IOException;
+
+/**
+ * Hadoop {@link FileIO}.
+ *
+ * <p>Important: copy this class from HadoopFileIO here to avoid class loader
conflicts.
+ */
+public abstract class HadoopCompliantFileIO implements FileIO {
+
+ private static final long serialVersionUID = 1L;
+
+ protected transient volatile FileSystem fs;
+
+ @Override
+ public SeekableInputStream newInputStream(Path path) throws IOException {
+ org.apache.hadoop.fs.Path hadoopPath = path(path);
+ return new
HadoopSeekableInputStream(getFileSystem(hadoopPath).open(hadoopPath));
+ }
+
+ @Override
+ public PositionOutputStream newOutputStream(Path path, boolean overwrite)
throws IOException {
+ org.apache.hadoop.fs.Path hadoopPath = path(path);
+ return new HadoopPositionOutputStream(
+ getFileSystem(hadoopPath).create(hadoopPath, overwrite));
+ }
+
+ @Override
+ public FileStatus getFileStatus(Path path) throws IOException {
+ org.apache.hadoop.fs.Path hadoopPath = path(path);
+ return new
HadoopFileStatus(getFileSystem(hadoopPath).getFileStatus(hadoopPath));
+ }
+
+ @Override
+ public FileStatus[] listStatus(Path path) throws IOException {
+ org.apache.hadoop.fs.Path hadoopPath = path(path);
+ FileStatus[] statuses = new FileStatus[0];
+ org.apache.hadoop.fs.FileStatus[] hadoopStatuses =
+ getFileSystem(hadoopPath).listStatus(hadoopPath);
+ if (hadoopStatuses != null) {
+ statuses = new FileStatus[hadoopStatuses.length];
+ for (int i = 0; i < hadoopStatuses.length; i++) {
+ statuses[i] = new HadoopFileStatus(hadoopStatuses[i]);
+ }
+ }
+ return statuses;
+ }
+
+ @Override
+ public boolean exists(Path path) throws IOException {
+ org.apache.hadoop.fs.Path hadoopPath = path(path);
+ return getFileSystem(hadoopPath).exists(hadoopPath);
+ }
+
+ @Override
+ public boolean delete(Path path, boolean recursive) throws IOException {
+ org.apache.hadoop.fs.Path hadoopPath = path(path);
+ return getFileSystem(hadoopPath).delete(hadoopPath, recursive);
+ }
+
+ @Override
+ public boolean mkdirs(Path path) throws IOException {
+ org.apache.hadoop.fs.Path hadoopPath = path(path);
+ return getFileSystem(hadoopPath).mkdirs(hadoopPath);
+ }
+
+ @Override
+ public boolean rename(Path src, Path dst) throws IOException {
+ org.apache.hadoop.fs.Path hadoopSrc = path(src);
+ org.apache.hadoop.fs.Path hadoopDst = path(dst);
+ return getFileSystem(hadoopSrc).rename(hadoopSrc, hadoopDst);
+ }
+
+ private org.apache.hadoop.fs.Path path(Path path) {
+ return new org.apache.hadoop.fs.Path(path.toUri());
+ }
+
+ private FileSystem getFileSystem(org.apache.hadoop.fs.Path path) throws
IOException {
+ if (fs == null) {
+ synchronized (this) {
+ if (fs == null) {
+ fs = createFileSystem(path);
+ }
+ }
+ }
+ return fs;
+ }
+
+ protected abstract FileSystem createFileSystem(org.apache.hadoop.fs.Path
path)
+ throws IOException;
+
+ private static class HadoopSeekableInputStream extends SeekableInputStream
{
+ /**
+ * Minimum amount of bytes to skip forward before we issue a seek
instead of discarding
+ * read.
+ *
+ * <p>The current value is just a magic number. In the long run, this
value could become
+ * configurable, but for now it is a conservative, relatively small
value that should bring
+ * safe improvements for small skips (e.g. in reading meta data), that
would hurt the most
+ * with frequent seeks.
+ *
+ * <p>The optimal value depends on the DFS implementation and
configuration plus the
+ * underlying filesystem. For now, this number is chosen "big enough"
to provide
+ * improvements for smaller seeks, and "small enough" to avoid
disadvantages over real
+ * seeks. While the minimum should be the page size, a true optimum
per system would be the
+ * amounts of bytes the can be consumed sequentially within the
seektime. Unfortunately,
+ * seektime is not constant and devices, OS, and DFS potentially also
use read buffers and
+ * read-ahead.
+ */
+ private static final int MIN_SKIP_BYTES = 1024 * 1024;
+
+ private final FSDataInputStream in;
+
+ private HadoopSeekableInputStream(FSDataInputStream in) {
+ this.in = in;
+ }
+
+ @Override
+ public void seek(long seekPos) throws IOException {
+ // We do some optimizations to avoid that some implementations of
distributed FS perform
+ // expensive seeks when they are actually not needed.
+ long delta = seekPos - getPos();
+ if (delta > 0L && delta <= MIN_SKIP_BYTES) {
+ // Instead of a small forward seek, we skip over the gap
+ skipFully(delta);
+ } else if (delta != 0L) {
+ // For larger gaps and backward seeks, we do a real seek
+ forceSeek(seekPos);
+ } // Do nothing if delta is zero.
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return in.getPos();
+ }
+
+ @Override
+ public int read() throws IOException {
+ return in.read();
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ return in.read(b, off, len);
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+
+ /**
+ * Positions the stream to the given location. In contrast to {@link
#seek(long)}, this
+ * method will always issue a "seek" command to the dfs and may not
replace it by {@link
+ * #skip(long)} for small seeks.
+ *
+ * <p>Notice that the underlying DFS implementation can still decide
to do skip instead of
+ * seek.
+ *
+ * @param seekPos the position to seek to.
+ */
+ public void forceSeek(long seekPos) throws IOException {
+ in.seek(seekPos);
+ }
+
+ /**
+ * Skips over a given amount of bytes in the stream.
+ *
+ * @param bytes the number of bytes to skip.
+ */
+ public void skipFully(long bytes) throws IOException {
+ while (bytes > 0) {
+ bytes -= in.skip(bytes);
+ }
+ }
+ }
+
+ private static class HadoopPositionOutputStream extends
PositionOutputStream {
+ private final FSDataOutputStream out;
+
+ private HadoopPositionOutputStream(FSDataOutputStream out) {
+ this.out = out;
+ }
+
+ @Override
+ public long getPos() {
+ return out.getPos();
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ out.write(b);
+ }
+
+ @Override
+ public void write(byte[] b) throws IOException {
+ out.write(b);
+ }
+
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ out.write(b, off, len);
+ }
+
+ @Override
+ public void flush() throws IOException {
+ out.hflush();
+ }
+
+ @Override
+ public void close() throws IOException {
+ out.close();
+ }
+ }
+
+ private static class HadoopFileStatus implements FileStatus {
+ private final org.apache.hadoop.fs.FileStatus status;
+
+ private HadoopFileStatus(org.apache.hadoop.fs.FileStatus status) {
+ this.status = status;
+ }
+
+ @Override
+ public long getLen() {
+ return status.getLen();
+ }
+
+ @Override
+ public boolean isDir() {
+ return status.isDirectory();
+ }
+
+ @Override
+ public Path getPath() {
+ return new Path(status.getPath().toUri());
+ }
+
+ @Override
+ public long getModificationTime() {
+ return status.getModificationTime();
+ }
+
+ @Override
+ public long getAccessTime() {
+ return status.getAccessTime();
+ }
+
+ @Nullable
+ @Override
+ public String getOwner() {
+ return status.getOwner();
+ }
+ }
+}
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/NOTICE
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/NOTICE
new file mode 100644
index 0000000000..0430ffc035
--- /dev/null
+++ b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/NOTICE
@@ -0,0 +1,91 @@
+paimon-azure-impl
+Copyright 2023-2024 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+This project bundles the following dependencies under the Apache Software
License 2.0 (http://www.apache.org/licenses/LICENSE-2.0.txt)
+
+- com.fasterxml.jackson.core:jackson-annotations:2.14.2
+- com.fasterxml.jackson.core:jackson-core:2.14.2
+- com.fasterxml.jackson.core:jackson-databind:2.14.2
+- com.fasterxml.jackson.dataformat:jackson-dataformat-csv:2.14.2
+- com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.14.2
+- com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.14.2
+- com.fasterxml.jackson.datatype:jackson-datatype-joda:2.9.4
+- com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.14.2
+- com.fasterxml.woodstox:woodstox-core:5.3.0
+- com.github.ben-manes.caffeine:caffeine:2.9.3
+- com.github.stephenc.jcip:jcip-annotations:1.0-1
+- com.google.code.findbugs:jsr305:1.3.9
+- com.google.errorprone:error_prone_annotations:2.5.1
+- com.google.guava:failureaccess:1.0.1
+- com.google.j2objc:j2objc-annotations:1.3
+- commons-beanutils:commons-beanutils:1.9.4
+- commons-codec:commons-codec:1.11
+- commons-collections:commons-collections:3.2.2
+- commons-io:commons-io:2.8.0
+- commons-logging:commons-logging:1.1.3
+- org.apache.commons:commons-compress:1.21
+- org.apache.commons:commons-configuration2:2.1.1
+- org.apache.commons:commons-lang3:3.12.0
+- org.apache.commons:commons-text:1.4
+- org.apache.hadoop:hadoop-annotations:3.3.4
+- org.apache.hadoop:hadoop-auth:3.3.4
+- org.apache.hadoop:hadoop-azure:3.3.4
+- org.apache.hadoop:hadoop-common:3.3.4
+- org.apache.hadoop.thirdparty:hadoop-shaded-guava:1.1.1
+- org.apache.hadoop.thirdparty:hadoop-shaded-protobuf_3_7:1.1.1
+- org.apache.httpcomponents:httpclient:4.5.13
+- org.apache.httpcomponents:httpcore:4.4.13
+- org.apache.kerby:kerb-core:1.0.1
+- org.apache.kerby:kerby-pkix:1.0.1
+- org.apache.kerby:kerby-asn1:1.0.1
+- org.apache.kerby:kerby-util:1.0.1
+- org.apache.logging.log4j:log4j-1.2-api:2.17.1
+- org.apache.logging.log4j:log4j-api:2.17.1
+- org.apache.logging.log4j:log4j-core:2.17.1
+- org.apache.logging.log4j:log4j-slf4j-impl:2.17.1
+- org.xerial.snappy:snappy-java:1.1.8.2
+
+This project bundles the following dependencies under BSD-2 License
(https://opensource.org/licenses/BSD-2-Clause):
+- com.github.luben:zstd-jni:1.5.5-11
+- dnsjava:dnsjava:2.1.7
+
+This project bundles the following dependencies under the MIT License
(https://opensource.org/licenses/MIT):
+- com.microsoft.azure:adal4j:1.6.2
+- com.microsoft.azure:azure:1.16.0
+- com.microsoft.azure:azure-annotations:1.2.0
+- com.microsoft.azure:azure-client-authentication:1.6.2
+- com.microsoft.azure:azure-client-runtime:1.6.2
+- com.microsoft.azure:azure-keyvault:1.0.0
+- com.microsoft.azure:azure-keyvault-core:1.0.0
+- com.microsoft.azure:azure-keyvault-webkey:1.0.0
+- com.microsoft.rest:client-runtime:1.6.2
+- org.checkerframework:checker-qual:3.8.0
+- org.slf4j:slf4j-api:1.7.32
+
+This project bundles the following dependencies under CDDL/GPLv2+CE License:
+- com.sun.mail:javax.mail:1.6.1
+- javax.activation:javax.activation-api:1.2.0
+
+This project bundles the following dependencies under the Go License
(https://golang.org/LICENSE):
+- com.google.re2j:re2j:1.1
+
+This project bundles the following dependencies under BSD License
(https://opensource.org/licenses/bsd-license.php):
+- org.codehaus.woodstox:stax2-api:4.2.1
+
+The bundled Apache Hadoop Relocated (Shaded) Third-party Miscellaneous Libs
+org.apache.hadoop.thirdparty:hadoop-shaded-guava dependency bundles the
following dependencies under
+the Apache Software License 2.0
(http://www.apache.org/licenses/LICENSE-2.0.txt)
+
+- com.google.guava:guava:30.1.1-jre
+
+The bundled Apache Hadoop Relocated (Shaded) Third-party Miscellaneous Libs
+org.apache.hadoop.thirdparty:hadoop-shaded-protobuf_3_7 dependency bundles the
following dependencies under
+the Apache Software License 2.0
(http://www.apache.org/licenses/LICENSE-2.0.txt)
+
+This project bundles the following dependencies under the Eclipse Distribution
License - v 1.0
+You find it under licenses/LICENSE.jakarta
+
+- jakarta.activation:jakarta.activation-api:1.2.1
\ No newline at end of file
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.animal-sniffer
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.animal-sniffer
new file mode 100644
index 0000000000..2062eb88b4
--- /dev/null
+++
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.animal-sniffer
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) 2009 codehaus.org.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.checker-framework-qualifiers
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.checker-framework-qualifiers
new file mode 100644
index 0000000000..7b59b5c982
--- /dev/null
+++
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.checker-framework-qualifiers
@@ -0,0 +1,22 @@
+Checker Framework qualifiers
+Copyright 2004-present by the Checker Framework developers
+
+MIT License:
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.dnsjava
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.dnsjava
new file mode 100644
index 0000000000..8daf3fc254
--- /dev/null
+++
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.dnsjava
@@ -0,0 +1,30 @@
+Copyright (c) 1998-2019, Brian Wellington
+Copyright (c) 2005 VeriSign. All rights reserved.
+Copyright (c) 2019-2021, dnsjava authors
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.jakarta
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.jakarta
new file mode 100644
index 0000000000..0dea72127c
--- /dev/null
+++
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.jakarta
@@ -0,0 +1,28 @@
+Copyright (c) 2018 Oracle and/or its affiliates. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Eclipse Foundation, Inc. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.jaxb
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.jaxb
new file mode 100644
index 0000000000..fd16ea9546
--- /dev/null
+++
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.jaxb
@@ -0,0 +1,135 @@
+COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)Version 1.1
+
+1. Definitions.
+
+ 1.1. "Contributor" means each individual or entity that creates or
contributes to the creation of Modifications.
+
+ 1.2. "Contributor Version" means the combination of the Original
Software, prior Modifications used by a Contributor (if any), and the
Modifications made by that particular Contributor.
+
+ 1.3. "Covered Software" means (a) the Original Software, or (b)
Modifications, or (c) the combination of files containing Original Software
with files containing Modifications, in each case including portions thereof.
+
+ 1.4. "Executable" means the Covered Software in any form other than
Source Code.
+
+ 1.5. "Initial Developer" means the individual or entity that first makes
Original Software available under this License.
+
+ 1.6. "Larger Work" means a work which combines Covered Software or
portions thereof with code not governed by the terms of this License.
+
+ 1.7. "License" means this document.
+
+ 1.8. "Licensable" means having the right to grant, to the maximum extent
possible, whether at the time of the initial grant or subsequently acquired,
any and all of the rights conveyed herein.
+
+ 1.9. "Modifications" means the Source Code and Executable form of any of
the following:
+
+ A. Any file that results from an addition to, deletion from or
modification of the contents of a file containing Original Software or previous
Modifications;
+
+ B. Any new file that contains any part of the Original Software or
previous Modification; or
+
+ C. Any new file that is contributed or otherwise made available under the
terms of this License.
+
+ 1.10. "Original Software" means the Source Code and Executable form of
computer software code that is originally released under this License.
+
+ 1.11. "Patent Claims" means any patent claim(s), now owned or hereafter
acquired, including without limitation, method, process, and apparatus claims,
in any patent Licensable by grantor.
+
+ 1.12. "Source Code" means (a) the common form of computer software code
in which modifications are made and (b) associated documentation included in or
with such code.
+
+ 1.13. "You" (or "Your") means an individual or a legal entity exercising
rights under, and complying with all of the terms of, this License. For legal
entities, "You" includes any entity which controls, is controlled by, or is
under common control with You. For purposes of this definition, "control" means
(a) the power, direct or indirect, to cause the direction or management of such
entity, whether by contract or otherwise, or (b) ownership of more than fifty
percent (50%) of the o [...]
+
+2. License Grants.
+
+ 2.1. The Initial Developer Grant.
+
+ Conditioned upon Your compliance with Section 3.1 below and subject to
third party intellectual property claims, the Initial Developer hereby grants
You a world-wide, royalty-free, non-exclusive license:
+
+ (a) under intellectual property rights (other than patent or trademark)
Licensable by Initial Developer, to use, reproduce, modify, display, perform,
sublicense and distribute the Original Software (or portions thereof), with or
without Modifications, and/or as part of a Larger Work; and
+
+ (b) under Patent Claims infringed by the making, using or selling of
Original Software, to make, have made, use, practice, sell, and offer for sale,
and/or otherwise dispose of the Original Software (or portions thereof).
+
+ (c) The licenses granted in Sections 2.1(a) and (b) are effective on the
date Initial Developer first distributes or otherwise makes the Original
Software available to a third party under the terms of this License.
+
+ (d) Notwithstanding Section 2.1(b) above, no patent license is granted:
(1) for code that You delete from the Original Software, or (2) for
infringements caused by: (i) the modification of the Original Software, or (ii)
the combination of the Original Software with other software or devices.
+
+ 2.2. Contributor Grant.
+
+ Conditioned upon Your compliance with Section 3.1 below and subject to
third party intellectual property claims, each Contributor hereby grants You a
world-wide, royalty-free, non-exclusive license:
+
+ (a) under intellectual property rights (other than patent or trademark)
Licensable by Contributor to use, reproduce, modify, display, perform,
sublicense and distribute the Modifications created by such Contributor (or
portions thereof), either on an unmodified basis, with other Modifications, as
Covered Software and/or as part of a Larger Work; and
+
+ (b) under Patent Claims infringed by the making, using, or selling of
Modifications made by that Contributor either alone and/or in combination with
its Contributor Version (or portions of such combination), to make, use, sell,
offer for sale, have made, and/or otherwise dispose of: (1) Modifications made
by that Contributor (or portions thereof); and (2) the combination of
Modifications made by that Contributor with its Contributor Version (or
portions of such combination).
+
+ (c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on
the date Contributor first distributes or otherwise makes the Modifications
available to a third party.
+
+ (d) Notwithstanding Section 2.2(b) above, no patent license is granted:
(1) for any code that Contributor has deleted from the Contributor Version; (2)
for infringements caused by: (i) third party modifications of Contributor
Version, or (ii) the combination of Modifications made by that Contributor with
other software (except as part of the Contributor Version) or other devices; or
(3) under Patent Claims infringed by Covered Software in the absence of
Modifications made by that Co [...]
+
+3. Distribution Obligations.
+
+ 3.1. Availability of Source Code.
+
+ Any Covered Software that You distribute or otherwise make available in
Executable form must also be made available in Source Code form and that Source
Code form must be distributed only under the terms of this License. You must
include a copy of this License with every copy of the Source Code form of the
Covered Software You distribute or otherwise make available. You must inform
recipients of any such Covered Software in Executable form as to how they can
obtain such Covered Softw [...]
+
+ 3.2. Modifications.
+
+ The Modifications that You create or to which You contribute are governed
by the terms of this License. You represent that You believe Your Modifications
are Your original creation(s) and/or You have sufficient rights to grant the
rights conveyed by this License.
+
+ 3.3. Required Notices.
+
+ You must include a notice in each of Your Modifications that identifies
You as the Contributor of the Modification. You may not remove or alter any
copyright, patent or trademark notices contained within the Covered Software,
or any notices of licensing or any descriptive text giving attribution to any
Contributor or the Initial Developer.
+
+ 3.4. Application of Additional Terms.
+
+ You may not offer or impose any terms on any Covered Software in Source
Code form that alters or restricts the applicable version of this License or
the recipients' rights hereunder. You may choose to offer, and to charge a fee
for, warranty, support, indemnity or liability obligations to one or more
recipients of Covered Software. However, you may do so only on Your own behalf,
and not on behalf of the Initial Developer or any Contributor. You must make it
absolutely clear that any [...]
+
+ 3.5. Distribution of Executable Versions.
+
+ You may distribute the Executable form of the Covered Software under the
terms of this License or under the terms of a license of Your choice, which may
contain terms different from this License, provided that You are in compliance
with the terms of this License and that the license for the Executable form
does not attempt to limit or alter the recipient's rights in the Source Code
form from the rights set forth in this License. If You distribute the Covered
Software in Executable f [...]
+
+ 3.6. Larger Works.
+
+ You may create a Larger Work by combining Covered Software with other
code not governed by the terms of this License and distribute the Larger Work
as a single product. In such a case, You must make sure the requirements of
this License are fulfilled for the Covered Software.
+
+4. Versions of the License.
+
+ 4.1. New Versions.
+
+ Oracle is the initial license steward and may publish revised and/or new
versions of this License from time to time. Each version will be given a
distinguishing version number. Except as provided in Section 4.3, no one other
than the license steward has the right to modify this License.
+
+ 4.2. Effect of New Versions.
+
+ You may always continue to use, distribute or otherwise make the Covered
Software available under the terms of the version of the License under which
You originally received the Covered Software. If the Initial Developer includes
a notice in the Original Software prohibiting it from being distributed or
otherwise made available under any subsequent version of the License, You must
distribute and make the Covered Software available under the terms of the
version of the License under [...]
+
+ 4.3. Modified Versions.
+
+ When You are an Initial Developer and You want to create a new license
for Your Original Software, You may create and use a modified version of this
License if You: (a) rename the license and remove any references to the name of
the license steward (except to note that the license differs from this
License); and (b) otherwise make it clear that the license contains terms which
differ from this License.
+
+5. DISCLAIMER OF WARRANTY.
+
+ COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS,
WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT
LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS,
MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK
AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD
ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL
DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUM [...]
+
+6. TERMINATION.
+
+ 6.1. This License and the rights granted hereunder will terminate
automatically if You fail to comply with terms herein and fail to cure such
breach within 30 days of becoming aware of the breach. Provisions which, by
their nature, must remain in effect beyond the termination of this License
shall survive.
+
+ 6.2. If You assert a patent infringement claim (excluding declaratory
judgment actions) against Initial Developer or a Contributor (the Initial
Developer or Contributor against whom You assert such claim is referred to as
"Participant") alleging that the Participant Software (meaning the Contributor
Version where the Participant is a Contributor or the Original Software where
the Participant is the Initial Developer) directly or indirectly infringes any
patent, then any and all righ [...]
+
+ 6.3. If You assert a patent infringement claim against Participant
alleging that the Participant Software directly or indirectly infringes any
patent where such claim is resolved (such as by license or settlement) prior to
the initiation of patent infringement litigation, then the reasonable value of
the licenses granted by such Participant under Sections 2.1 or 2.2 shall be
taken into account in determining the amount or value of any payment or license.
+
+ 6.4. In the event of termination under Sections 6.1 or 6.2 above, all end
user licenses that have been validly granted by You or any distributor
hereunder prior to termination (excluding licenses granted to You by any
distributor) shall survive termination.
+
+7. LIMITATION OF LIABILITY.
+
+ UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING
NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY
OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF
ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR
MALFUNCTION, OR ANY AND ALL OTHER COMM [...]
+
+8. U.S. GOVERNMENT END USERS.
+
+ The Covered Software is a "commercial item," as that term is defined in
48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer software" (as
that term is defined at 48 C.F.R. ? 252.227-7014(a)(1)) and "commercial
computer software documentation" as such terms are used in 48 C.F.R. 12.212
(Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through
227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software
with only those rights set for [...]
+
+9. MISCELLANEOUS.
+
+ This License represents the complete agreement concerning subject matter
hereof. If any provision of this License is held to be unenforceable, such
provision shall be reformed only to the extent necessary to make it
enforceable. This License shall be governed by the law of the jurisdiction
specified in a notice contained within the Original Software (except to the
extent applicable law, if any, provides otherwise), excluding such
jurisdiction's conflict-of-law provisions. Any litiga [...]
+
+10. RESPONSIBILITY FOR CLAIMS.
+
+ As between Initial Developer and the Contributors, each party is
responsible for claims and damages arising, directly or indirectly, out of its
utilization of rights under this License and You agree to work with Initial
Developer and Contributors to distribute such responsibility on an equitable
basis. Nothing herein is intended or shall be deemed to constitute any
admission of liability.
+
+----------
+NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION
LICENSE (CDDL)
+The code released under the CDDL shall be governed by the laws of the State of
California (excluding conflict-of-law provisions). Any litigation relating to
this License shall be subject to the jurisdiction of the Federal Courts of the
Northern District of California and the state courts of the State of
California, with venue lying in Santa Clara County, California.
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.re2j
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.re2j
new file mode 100644
index 0000000000..b620ae68fe
--- /dev/null
+++
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.re2j
@@ -0,0 +1,32 @@
+This is a work derived from Russ Cox's RE2 in Go, whose license
+http://golang.org/LICENSE is as follows:
+
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+ * Neither the name of Google Inc. nor the names of its contributors
+ may be used to endorse or promote products derived from this
+ software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git
a/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.stax2api
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.stax2api
new file mode 100644
index 0000000000..0ed6361699
--- /dev/null
+++
b/paimon-filesystems/paimon-azure-impl/src/main/resources/META-INF/licenses/LICENSE.stax2api
@@ -0,0 +1,22 @@
+Copyright woodstox stax2api contributors.
+
+Redistribution and use in source and binary forms, with or without
modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA,
+OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/paimon-filesystems/paimon-azure/pom.xml
b/paimon-filesystems/paimon-azure/pom.xml
new file mode 100644
index 0000000000..acb60a20e9
--- /dev/null
+++ b/paimon-filesystems/paimon-azure/pom.xml
@@ -0,0 +1,143 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>paimon-filesystems</artifactId>
+ <groupId>org.apache.paimon</groupId>
+ <version>1.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>paimon-azure</artifactId>
+ <name>Paimon : FileSystems : AZURE</name>
+ <packaging>jar</packaging>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.paimon</groupId>
+ <artifactId>paimon-common</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.paimon</groupId>
+ <artifactId>paimon-azure-impl</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ <optional>true</optional>
+ <exclusions>
+ <exclusion>
+ <groupId>*</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <archive>
+ <manifestEntries>
+ <!-- jaxb-api is packaged as an optional
dependency that is only accessible on Java 11 -->
+ <Multi-Release>true</Multi-Release>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-azure-classes</id>
+ <phase>prepare-package</phase>
+ <goals>
+ <goal>unpack</goal>
+ </goals>
+ <configuration>
+ <artifactItems>
+ <artifactItem>
+ <groupId>org.apache.paimon</groupId>
+ <artifactId>paimon-azure-impl</artifactId>
+ <version>${project.version}</version>
+ <type>jar</type>
+ <overWrite>true</overWrite>
+
<outputDirectory>${project.build.directory}/classes/paimon-plugin-azure</outputDirectory>
+ <excludes>META-INF/**</excludes>
+ </artifactItem>
+ <artifactItem>
+ <groupId>org.apache.paimon</groupId>
+ <artifactId>paimon-azure-impl</artifactId>
+ <version>${project.version}</version>
+ <type>jar</type>
+ <overWrite>true</overWrite>
+
<outputDirectory>${project.build.directory}/classes/paimon-plugin-azure</outputDirectory>
+
<includes>META-INF/services/**,META-INF/versions/**</includes>
+ </artifactItem>
+ </artifactItems>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>shade-paimon</id>
+ <configuration>
+ <artifactSet>
+ <includes combine.children="append">
+
<include>org.apache.paimon:paimon-azure-impl</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+
<artifact>org.apache.paimon:paimon-azure-impl</artifact>
+ <includes>
+ <include>META-INF/**</include>
+ </includes>
+ <excludes>
+ <exclude>META-INF/services/**</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
\ No newline at end of file
diff --git
a/paimon-filesystems/paimon-azure/src/main/java/org/apache/paimon/azure/AzureLoader.java
b/paimon-filesystems/paimon-azure/src/main/java/org/apache/paimon/azure/AzureLoader.java
new file mode 100644
index 0000000000..a111594319
--- /dev/null
+++
b/paimon-filesystems/paimon-azure/src/main/java/org/apache/paimon/azure/AzureLoader.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.azure;
+
+import org.apache.paimon.catalog.CatalogContext;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.FileIOLoader;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.PluginFileIO;
+import org.apache.paimon.plugin.PluginLoader;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Azure Blob Storage {@link FileIOLoader}. */
+public class AzureLoader implements FileIOLoader {
+
+ private static final long serialVersionUID = 1L;
+
+ private static final String AZURE_CLASSES_DIR = "paimon-plugin-azure";
+
+ private static final String AZURE_CLASS =
"org.apache.paimon.azure.AzureFileIO";
+
+ private static PluginLoader loader;
+
+ private static synchronized PluginLoader getLoader() {
+ if (loader == null) {
+ loader = new PluginLoader(AZURE_CLASSES_DIR);
+ }
+ return loader;
+ }
+
+ @Override
+ public String getScheme() {
+ return "abfs";
+ }
+
+ @Override
+ public List<String[]> requiredOptions() {
+ List<String[]> options = new ArrayList<>();
+ options.add(new String[] {"azure.account-name",
"fs.azure.account.name"});
+ options.add(new String[] {"azure.account-key",
"fs.azure.account.key"});
+ return options;
+ }
+
+ @Override
+ public FileIO load(Path path) {
+ return new AzurePluginFileIO();
+ }
+
+ private static class AzurePluginFileIO extends PluginFileIO {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean isObjectStore() {
+ return true;
+ }
+
+ @Override
+ protected FileIO createFileIO(Path path) {
+ FileIO fileIO = getLoader().newInstance(AZURE_CLASS);
+ fileIO.configure(CatalogContext.create(options));
+ return fileIO;
+ }
+
+ @Override
+ protected ClassLoader pluginClassLoader() {
+ return getLoader().submoduleClassLoader();
+ }
+ }
+}
diff --git
a/paimon-filesystems/paimon-azure/src/main/resources/META-INF.services/org.apache.paimon.fs.FileIOLoader
b/paimon-filesystems/paimon-azure/src/main/resources/META-INF.services/org.apache.paimon.fs.FileIOLoader
new file mode 100644
index 0000000000..513f982daf
--- /dev/null
+++
b/paimon-filesystems/paimon-azure/src/main/resources/META-INF.services/org.apache.paimon.fs.FileIOLoader
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.paimon.azure.AzureLoader
\ No newline at end of file
diff --git a/paimon-filesystems/pom.xml b/paimon-filesystems/pom.xml
index 2a878f4cf1..75fa93546d 100644
--- a/paimon-filesystems/pom.xml
+++ b/paimon-filesystems/pom.xml
@@ -42,6 +42,8 @@
<module>paimon-jindo</module>
<module>paimon-gs</module>
<module>paimon-gs-impl</module>
+ <module>paimon-azure</module>
+ <module>paimon-azure-impl</module>
</modules>
<properties>