This is an automated email from the ASF dual-hosted git repository.
yuzelin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-python.git
The following commit(s) were added to refs/heads/main by this push:
new 0be8175 #40 Provide Optional Built-in Hadoop Dependencies (#41)
0be8175 is described below
commit 0be8175389f4f80f97ed7f964cfd002c55e7592b
Author: ChengHui Chen <[email protected]>
AuthorDate: Fri Mar 21 10:10:35 2025 +0800
#40 Provide Optional Built-in Hadoop Dependencies (#41)
---
MANIFEST.in | 1 +
dev/build-source-distribution-package.sh | 28 +++++++++---
dev/lint-python.sh | 2 +
hadoop-deps/pom.xml | 73 ++++++++++++++++++++++++++++++++
pypaimon/py4j/gateway_server.py | 16 +++++--
setup.py | 9 ++--
tools/releasing/create_source_release.sh | 27 +++++++++---
7 files changed, 136 insertions(+), 20 deletions(-)
diff --git a/MANIFEST.in b/MANIFEST.in
index 09e3734..e28dca0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -18,6 +18,7 @@
global-exclude *.py[cod] __pycache__ .DS_Store
recursive-include deps/jars *.jar
+recursive-include deps/hadoop *.jar
include README.md
include LICENSE
include NOTICE
diff --git a/dev/build-source-distribution-package.sh
b/dev/build-source-distribution-package.sh
index 26a8807..128700b 100755
--- a/dev/build-source-distribution-package.sh
+++ b/dev/build-source-distribution-package.sh
@@ -19,20 +19,34 @@ CURR_DIR=`pwd`
BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
PROJECT_ROOT="${BASE_DIR}/../"
-# prepare bridge jar
-
-DEPS_DIR=${PROJECT_ROOT}/deps/jars
+DEPS_DIR=${PROJECT_ROOT}/deps
rm -rf ${DEPS_DIR}
-mkdir -p ${DEPS_DIR}
-touch ${DEPS_DIR}/__init__.py
+
+# prepare bridge jar
+BRIDGE_DEPS_DIR=${DEPS_DIR}/jars
+mkdir -p ${BRIDGE_DEPS_DIR}
+touch ${BRIDGE_DEPS_DIR}/__init__.py
cd ${PROJECT_ROOT}/paimon-python-java-bridge
# get bridge jar version
-JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n
1)
+BRIDGE_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml |
head -n 1)
+
+mvn clean install -DskipTests
+cp "target/paimon-python-java-bridge-${BRIDGE_JAR_VERSION}.jar"
${BRIDGE_DEPS_DIR}
+
+# prepare hadoop-deps jar
+HADOOP_DEPS_DIR=${DEPS_DIR}/hadoop
+mkdir -p ${HADOOP_DEPS_DIR}
+touch ${HADOOP_DEPS_DIR}/__init__.py
+
+cd ${PROJECT_ROOT}/hadoop-deps
+
+# get hadoop-deps jar version
+HADOOP_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml |
head -n 1)
mvn clean install -DskipTests
-cp "target/paimon-python-java-bridge-${JAR_VERSION}.jar" ${DEPS_DIR}
+cp "target/hadoop-deps-${HADOOP_JAR_VERSION}.jar" ${HADOOP_DEPS_DIR}
cd ${CURR_DIR}
diff --git a/dev/lint-python.sh b/dev/lint-python.sh
index 687998c..7e220a3 100755
--- a/dev/lint-python.sh
+++ b/dev/lint-python.sh
@@ -580,6 +580,8 @@ function tox_check() {
# dummy jar needed by setup.py
mkdir -p $PAIMON_PYTHON_DIR/deps/jars
touch $PAIMON_PYTHON_DIR/deps/jars/dummy.jar
+ mkdir -p $PAIMON_PYTHON_DIR/deps/hadoop
+ touch $PAIMON_PYTHON_DIR/deps/hadoop/dummy.jar
if [[ -n "$GITHUB_ACTION" ]]; then
# Run tests in all versions triggered by a Git push (tests aren't so
many currently)
diff --git a/hadoop-deps/pom.xml b/hadoop-deps/pom.xml
new file mode 100644
index 0000000..fb1decd
--- /dev/null
+++ b/hadoop-deps/pom.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <groupId>org.apache.pypaimon</groupId>
+ <artifactId>hadoop-deps</artifactId>
+ <version>3.3.4</version>
+
+ <properties>
+ <hadoop.version>3.3.4</hadoop.version>
+ <log4j.version>2.17.1</log4j.version>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs-client</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-api</artifactId>
+ <version>${log4j.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+
<createDependencyReducedPom>false</createDependencyReducedPom>
+ <transformers>
+ <transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/pypaimon/py4j/gateway_server.py b/pypaimon/py4j/gateway_server.py
index f3a0fda..2588217 100644
--- a/pypaimon/py4j/gateway_server.py
+++ b/pypaimon/py4j/gateway_server.py
@@ -102,12 +102,20 @@ def _get_classpath(env):
return os.pathsep.join(classpath)
+_HADOOP_DEPS_PACKAGE = 'pypaimon.hadoop-deps'
+
+
def _get_hadoop_classpath(env):
if constants.PYPAIMON_HADOOP_CLASSPATH in env:
return env[constants.PYPAIMON_HADOOP_CLASSPATH]
-
- if 'HADOOP_CLASSPATH' in env:
+ elif 'HADOOP_CLASSPATH' in env:
return env['HADOOP_CLASSPATH']
else:
- raise EnvironmentError(f"You haven't set
'{constants.PYPAIMON_HADOOP_CLASSPATH}', \
- and 'HADOOP_CLASSPATH' is also not set. Ensure one of them is set.")
+ # use built-in hadoop
+ jars = importlib.resources.files(_HADOOP_DEPS_PACKAGE)
+ one_jar = next(iter(jars.iterdir()), None)
+ if not one_jar:
+ raise EnvironmentError(f"The built-in Hadoop environment has been
broken, this \
+ is unexpected. You can set one of
'{constants.PYPAIMON_HADOOP_CLASSPATH}' or \
+ 'HADOOP_CLASSPATH' to continue.")
+ return os.path.join(os.path.dirname(str(one_jar)), '*')
diff --git a/setup.py b/setup.py
index 4fc12a6..98515e0 100644
--- a/setup.py
+++ b/setup.py
@@ -38,7 +38,8 @@ PACKAGES = [
'pypaimon.api',
'pypaimon.py4j',
'pypaimon.py4j.util',
- 'pypaimon.jars'
+ 'pypaimon.jars',
+ 'pypaimon.hadoop-deps'
]
install_requires = [
@@ -57,10 +58,12 @@ setup(
include_package_data=True,
# releasing tool will generate deps
package_dir={
- "pypaimon.jars": "deps/jars"
+ "pypaimon.jars": "deps/jars",
+ "pypaimon.hadoop-deps": "deps/hadoop"
},
package_data={
- "pypaimon.jars": ["*.jar"]
+ "pypaimon.jars": ["*.jar"],
+ "pypaimon.hadoop-deps": ["*.jar"]
},
install_requires=install_requires,
description='Apache Paimon Python API',
diff --git a/tools/releasing/create_source_release.sh
b/tools/releasing/create_source_release.sh
index ad12a42..5e79be1 100755
--- a/tools/releasing/create_source_release.sh
+++ b/tools/releasing/create_source_release.sh
@@ -55,11 +55,13 @@ fi
###########################
-# prepare bridge jar
-
-DEPS_DIR=${PROJECT_ROOT}/deps/jars
+DEPS_DIR=${PROJECT_ROOT}/deps
rm -rf ${DEPS_DIR}
-mkdir -p ${DEPS_DIR}
+
+# prepare bridge jar
+BRIDGE_DEPS_DIR=${DEPS_DIR}/jars
+mkdir -p ${BRIDGE_DEPS_DIR}
+touch ${BRIDGE_DEPS_DIR}/__init__.py
cd ${PROJECT_ROOT}/paimon-python-java-bridge
@@ -70,10 +72,23 @@ if grep -q "<version>.*SNAPSHOT</version>" "pom.xml"; then
fi
# get bridge jar version
-JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n
1)
+BRIDGE_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml |
head -n 1)
+
+mvn clean install -DskipTests
+cp "target/paimon-python-java-bridge-${BRIDGE_JAR_VERSION}.jar"
${BRIDGE_DEPS_DIR}
+
+# prepare hadoop-deps jar
+HADOOP_DEPS_DIR=${DEPS_DIR}/hadoop
+mkdir -p ${HADOOP_DEPS_DIR}
+touch ${HADOOP_DEPS_DIR}/__init__.py
+
+cd ${PROJECT_ROOT}/hadoop-deps
+
+# get hadoop-deps jar version
+HADOOP_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml |
head -n 1)
mvn clean install -DskipTests
-cp "target/paimon-python-java-bridge-${JAR_VERSION}.jar" ${DEPS_DIR}
+cp "target/hadoop-deps-${HADOOP_JAR_VERSION}.jar" ${HADOOP_DEPS_DIR}
cd ${CURR_DIR}