This is an automated email from the ASF dual-hosted git repository.

yuzelin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 0be8175  #40 Provide Optional Built-in Hadoop Dependencies (#41)
0be8175 is described below

commit 0be8175389f4f80f97ed7f964cfd002c55e7592b
Author: ChengHui Chen <[email protected]>
AuthorDate: Fri Mar 21 10:10:35 2025 +0800

    #40 Provide Optional Built-in Hadoop Dependencies (#41)
---
 MANIFEST.in                              |  1 +
 dev/build-source-distribution-package.sh | 28 +++++++++---
 dev/lint-python.sh                       |  2 +
 hadoop-deps/pom.xml                      | 73 ++++++++++++++++++++++++++++++++
 pypaimon/py4j/gateway_server.py          | 16 +++++--
 setup.py                                 |  9 ++--
 tools/releasing/create_source_release.sh | 27 +++++++++---
 7 files changed, 136 insertions(+), 20 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 09e3734..e28dca0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -18,6 +18,7 @@
 
 global-exclude *.py[cod] __pycache__ .DS_Store
 recursive-include deps/jars *.jar
+recursive-include deps/hadoop *.jar
 include README.md
 include LICENSE
 include NOTICE
diff --git a/dev/build-source-distribution-package.sh 
b/dev/build-source-distribution-package.sh
index 26a8807..128700b 100755
--- a/dev/build-source-distribution-package.sh
+++ b/dev/build-source-distribution-package.sh
@@ -19,20 +19,34 @@ CURR_DIR=`pwd`
 BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
 PROJECT_ROOT="${BASE_DIR}/../"
 
-# prepare bridge jar
-
-DEPS_DIR=${PROJECT_ROOT}/deps/jars
+DEPS_DIR=${PROJECT_ROOT}/deps
 rm -rf ${DEPS_DIR}
-mkdir -p ${DEPS_DIR}
-touch ${DEPS_DIR}/__init__.py
+
+# prepare bridge jar
+BRIDGE_DEPS_DIR=${DEPS_DIR}/jars
+mkdir -p ${BRIDGE_DEPS_DIR}
+touch ${BRIDGE_DEPS_DIR}/__init__.py
 
 cd ${PROJECT_ROOT}/paimon-python-java-bridge
 
 # get bridge jar version
-JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n 
1)
+BRIDGE_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | 
head -n 1)
+
+mvn clean install -DskipTests
+cp "target/paimon-python-java-bridge-${BRIDGE_JAR_VERSION}.jar" 
${BRIDGE_DEPS_DIR}
+
+# prepare hadoop-deps jar
+HADOOP_DEPS_DIR=${DEPS_DIR}/hadoop
+mkdir -p ${HADOOP_DEPS_DIR}
+touch ${HADOOP_DEPS_DIR}/__init__.py
+
+cd ${PROJECT_ROOT}/hadoop-deps
+
+# get hadoop-deps jar version
+HADOOP_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | 
head -n 1)
 
 mvn clean install -DskipTests
-cp "target/paimon-python-java-bridge-${JAR_VERSION}.jar" ${DEPS_DIR}
+cp "target/hadoop-deps-${HADOOP_JAR_VERSION}.jar" ${HADOOP_DEPS_DIR}
 
 cd ${CURR_DIR}
 
diff --git a/dev/lint-python.sh b/dev/lint-python.sh
index 687998c..7e220a3 100755
--- a/dev/lint-python.sh
+++ b/dev/lint-python.sh
@@ -580,6 +580,8 @@ function tox_check() {
     # dummy jar needed by setup.py
     mkdir -p $PAIMON_PYTHON_DIR/deps/jars
     touch $PAIMON_PYTHON_DIR/deps/jars/dummy.jar
+    mkdir -p $PAIMON_PYTHON_DIR/deps/hadoop
+    touch $PAIMON_PYTHON_DIR/deps/hadoop/dummy.jar
 
     if [[ -n "$GITHUB_ACTION" ]]; then
         # Run tests in all versions triggered by a Git push (tests aren't so 
many currently)
diff --git a/hadoop-deps/pom.xml b/hadoop-deps/pom.xml
new file mode 100644
index 0000000..fb1decd
--- /dev/null
+++ b/hadoop-deps/pom.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>org.apache.pypaimon</groupId>
+    <artifactId>hadoop-deps</artifactId>
+    <version>3.3.4</version>
+
+    <properties>
+        <hadoop.version>3.3.4</hadoop.version>
+        <log4j.version>2.17.1</log4j.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-common</artifactId>
+            <version>${hadoop.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-hdfs-client</artifactId>
+            <version>${hadoop.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-api</artifactId>
+            <version>${log4j.version}</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            
<createDependencyReducedPom>false</createDependencyReducedPom>
+                            <transformers>
+                                <transformer 
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/pypaimon/py4j/gateway_server.py b/pypaimon/py4j/gateway_server.py
index f3a0fda..2588217 100644
--- a/pypaimon/py4j/gateway_server.py
+++ b/pypaimon/py4j/gateway_server.py
@@ -102,12 +102,20 @@ def _get_classpath(env):
     return os.pathsep.join(classpath)
 
 
+_HADOOP_DEPS_PACKAGE = 'pypaimon.hadoop-deps'
+
+
 def _get_hadoop_classpath(env):
     if constants.PYPAIMON_HADOOP_CLASSPATH in env:
         return env[constants.PYPAIMON_HADOOP_CLASSPATH]
-
-    if 'HADOOP_CLASSPATH' in env:
+    elif 'HADOOP_CLASSPATH' in env:
         return env['HADOOP_CLASSPATH']
     else:
-        raise EnvironmentError(f"You haven't set 
'{constants.PYPAIMON_HADOOP_CLASSPATH}', \
- and 'HADOOP_CLASSPATH' is also not set. Ensure one of them is set.")
+        # use built-in hadoop
+        jars = importlib.resources.files(_HADOOP_DEPS_PACKAGE)
+        one_jar = next(iter(jars.iterdir()), None)
+        if not one_jar:
+            raise EnvironmentError(f"The built-in Hadoop environment has been 
broken, this \
+            is unexpected. You can set one of 
'{constants.PYPAIMON_HADOOP_CLASSPATH}' or \
+            'HADOOP_CLASSPATH' to continue.")
+        return os.path.join(os.path.dirname(str(one_jar)), '*')
diff --git a/setup.py b/setup.py
index 4fc12a6..98515e0 100644
--- a/setup.py
+++ b/setup.py
@@ -38,7 +38,8 @@ PACKAGES = [
     'pypaimon.api',
     'pypaimon.py4j',
     'pypaimon.py4j.util',
-    'pypaimon.jars'
+    'pypaimon.jars',
+    'pypaimon.hadoop-deps'
 ]
 
 install_requires = [
@@ -57,10 +58,12 @@ setup(
     include_package_data=True,
     # releasing tool will generate deps
     package_dir={
-        "pypaimon.jars": "deps/jars"
+        "pypaimon.jars": "deps/jars",
+        "pypaimon.hadoop-deps": "deps/hadoop"
     },
     package_data={
-        "pypaimon.jars": ["*.jar"]
+        "pypaimon.jars": ["*.jar"],
+        "pypaimon.hadoop-deps": ["*.jar"]
     },
     install_requires=install_requires,
     description='Apache Paimon Python API',
diff --git a/tools/releasing/create_source_release.sh 
b/tools/releasing/create_source_release.sh
index ad12a42..5e79be1 100755
--- a/tools/releasing/create_source_release.sh
+++ b/tools/releasing/create_source_release.sh
@@ -55,11 +55,13 @@ fi
 
 ###########################
 
-# prepare bridge jar
-
-DEPS_DIR=${PROJECT_ROOT}/deps/jars
+DEPS_DIR=${PROJECT_ROOT}/deps
 rm -rf ${DEPS_DIR}
-mkdir -p ${DEPS_DIR}
+
+# prepare bridge jar
+BRIDGE_DEPS_DIR=${DEPS_DIR}/jars
+mkdir -p ${BRIDGE_DEPS_DIR}
+touch ${BRIDGE_DEPS_DIR}/__init__.py
 
 cd ${PROJECT_ROOT}/paimon-python-java-bridge
 
@@ -70,10 +72,23 @@ if grep -q "<version>.*SNAPSHOT</version>" "pom.xml"; then
 fi
 
 # get bridge jar version
-JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n 
1)
+BRIDGE_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | 
head -n 1)
+
+mvn clean install -DskipTests
+cp "target/paimon-python-java-bridge-${BRIDGE_JAR_VERSION}.jar" 
${BRIDGE_DEPS_DIR}
+
+# prepare hadoop-deps jar
+HADOOP_DEPS_DIR=${DEPS_DIR}/hadoop
+mkdir -p ${HADOOP_DEPS_DIR}
+touch ${HADOOP_DEPS_DIR}/__init__.py
+
+cd ${PROJECT_ROOT}/hadoop-deps
+
+# get hadoop-deps jar version
+HADOOP_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | 
head -n 1)
 
 mvn clean install -DskipTests
-cp "target/paimon-python-java-bridge-${JAR_VERSION}.jar" ${DEPS_DIR}
+cp "target/hadoop-deps-${HADOOP_JAR_VERSION}.jar" ${HADOOP_DEPS_DIR}
 
 cd ${CURR_DIR}
 

Reply via email to