This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 4bdfa4272d [Python] Add Mixed read and write test between Java and
Python. (#6579)
4bdfa4272d is described below
commit 4bdfa4272d9553ccca1f985daa87da722f21287a
Author: umi <[email protected]>
AuthorDate: Tue Nov 11 10:53:37 2025 +0800
[Python] Add Mixed read and write test between Java and Python. (#6579)
---
.github/workflows/paimon-python-checks.yml | 24 +++
.../test/java/org/apache/paimon/JavaPyE2ETest.java | 159 +++++++++++++++
paimon-python/dev/lint-python.sh | 49 ++++-
paimon-python/dev/run_mixed_tests.sh | 217 +++++++++++++++++++++
paimon-python/pypaimon/tests/e2e/__init__.py | 16 ++
.../pypaimon/tests/e2e/java_py_read_write_test.py | 89 +++++++++
6 files changed, 548 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/paimon-python-checks.yml
b/.github/workflows/paimon-python-checks.yml
index c4c7e32a84..80277e701c 100755
--- a/.github/workflows/paimon-python-checks.yml
+++ b/.github/workflows/paimon-python-checks.yml
@@ -32,6 +32,9 @@ on:
env:
PYTHON_VERSIONS: "['3.6.15', '3.10']"
+ JDK_VERSION: 8
+ MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=30
-Dmaven.wagon.http.retryHandler.requestSentEnabled=true
+
concurrency:
group: ${{ github.workflow }}-${{ github.event_name }}-${{
github.event.number || github.run_id }}
@@ -49,6 +52,17 @@ jobs:
- name: Checkout code
uses: actions/checkout@v2
+ - name: Set up JDK ${{ env.JDK_VERSION }}
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{ env.JDK_VERSION }}
+ distribution: 'temurin'
+
+ - name: Set up Maven
+ uses: stCarolas/[email protected]
+ with:
+ maven-version: 3.8.8
+
- name: Install system dependencies
shell: bash
run: |
@@ -58,9 +72,19 @@ jobs:
curl \
&& rm -rf /var/lib/apt/lists/*
+ - name: Verify Java and Maven installation
+ run: |
+ java -version
+ mvn -version
+
- name: Verify Python version
run: python --version
+ - name: Build Java
+ run: |
+ echo "Start compiling modules"
+ mvn -T 2C -B clean install -DskipTests
+
- name: Install Python dependencies
shell: bash
run: |
diff --git a/paimon-core/src/test/java/org/apache/paimon/JavaPyE2ETest.java
b/paimon-core/src/test/java/org/apache/paimon/JavaPyE2ETest.java
new file mode 100644
index 0000000000..3759bc8757
--- /dev/null
+++ b/paimon-core/src/test/java/org/apache/paimon/JavaPyE2ETest.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon;
+
+import org.apache.paimon.catalog.Catalog;
+import org.apache.paimon.catalog.CatalogContext;
+import org.apache.paimon.catalog.CatalogFactory;
+import org.apache.paimon.catalog.Identifier;
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.data.DataFormatTestUtil;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.schema.Schema;
+import org.apache.paimon.table.FileStoreTable;
+import org.apache.paimon.table.Table;
+import org.apache.paimon.table.sink.InnerTableCommit;
+import org.apache.paimon.table.sink.StreamTableWrite;
+import org.apache.paimon.table.source.Split;
+import org.apache.paimon.table.source.TableRead;
+import org.apache.paimon.types.DataTypes;
+import org.apache.paimon.utils.TraceableFileIO;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
+
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+import static org.apache.paimon.table.SimpleTableTestBase.getResult;
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Mixed language overwrite test for Java and Python interoperability. */
+public class JavaPyE2ETest {
+
+ java.nio.file.Path tempDir =
Paths.get("../paimon-python/pypaimon/tests/e2e").toAbsolutePath();
+
+ // Fields from TableTestBase that we need
+ protected final String commitUser = UUID.randomUUID().toString();
+ protected Path warehouse;
+ protected Catalog catalog;
+ protected String database;
+
+ @BeforeEach
+ public void before() throws Exception {
+ database = "default";
+
+ // Create warehouse directory if it doesn't exist
+ if (!Files.exists(tempDir.resolve("warehouse"))) {
+ Files.createDirectories(tempDir.resolve("warehouse"));
+ }
+
+ warehouse = new Path(TraceableFileIO.SCHEME + "://" +
tempDir.resolve("warehouse"));
+ catalog =
CatalogFactory.createCatalog(CatalogContext.create(warehouse));
+
+ // Create database if it doesn't exist
+ try {
+ catalog.createDatabase(database, false);
+ } catch (Catalog.DatabaseAlreadyExistException e) {
+ // Database already exists, ignore
+ }
+ }
+
+ @Test
+ @EnabledIfSystemProperty(named = "run.e2e.tests", matches = "true")
+ public void testJavaWriteRead() throws Exception {
+ Identifier identifier = identifier("mixed_test_tablej");
+ Schema schema =
+ Schema.newBuilder()
+ .column("id", DataTypes.INT())
+ .column("name", DataTypes.STRING())
+ .column("category", DataTypes.STRING())
+ .column("value", DataTypes.DOUBLE())
+ .partitionKeys("category")
+ .option("dynamic-partition-overwrite", "false")
+ .build();
+
+ catalog.createTable(identifier, schema, true);
+ Table table = catalog.getTable(identifier);
+ FileStoreTable fileStoreTable = (FileStoreTable) table;
+
+ try (StreamTableWrite write = fileStoreTable.newWrite(commitUser);
+ InnerTableCommit commit =
fileStoreTable.newCommit(commitUser)) {
+
+ write.write(createRow(1, "Apple", "Fruit", 1.5));
+ write.write(createRow(2, "Banana", "Fruit", 0.8));
+ write.write(createRow(3, "Carrot", "Vegetable", 0.6));
+ write.write(createRow(4, "Broccoli", "Vegetable", 1.2));
+ write.write(createRow(5, "Chicken", "Meat", 5.0));
+ write.write(createRow(6, "Beef", "Meat", 8.0));
+
+ commit.commit(0, write.prepareCommit(true, 0));
+ }
+
+ List<Split> splits =
+ new
ArrayList<>(fileStoreTable.newSnapshotReader().read().dataSplits());
+ TableRead read = fileStoreTable.newRead();
+ List<String> res =
+ getResult(
+ read,
+ splits,
+ row -> DataFormatTestUtil.toStringNoRowKind(row,
table.rowType()));
+ assertThat(res)
+ .containsExactlyInAnyOrder(
+ "1, Apple, Fruit, 1.5",
+ "2, Banana, Fruit, 0.8",
+ "3, Carrot, Vegetable, 0.6",
+ "4, Broccoli, Vegetable, 1.2",
+ "5, Chicken, Meat, 5.0",
+ "6, Beef, Meat, 8.0");
+ }
+
+ @Test
+ @EnabledIfSystemProperty(named = "run.e2e.tests", matches = "true")
+ public void testRead() throws Exception {
+ Identifier identifier = identifier("mixed_test_tablep");
+ Table table = catalog.getTable(identifier);
+ FileStoreTable fileStoreTable = (FileStoreTable) table;
+ List<Split> splits =
+ new
ArrayList<>(fileStoreTable.newSnapshotReader().read().dataSplits());
+ TableRead read = fileStoreTable.newRead();
+ List<String> res =
+ getResult(
+ read,
+ splits,
+ row -> DataFormatTestUtil.toStringNoRowKind(row,
table.rowType()));
+ System.out.println(res);
+ }
+
+ // Helper method from TableTestBase
+ protected Identifier identifier(String tableName) {
+ return new Identifier(database, tableName);
+ }
+
+ private static InternalRow createRow(int id, String name, String category,
double value) {
+ return GenericRow.of(
+ id, BinaryString.fromString(name),
BinaryString.fromString(category), value);
+ }
+}
diff --git a/paimon-python/dev/lint-python.sh b/paimon-python/dev/lint-python.sh
index 9cbb612137..fb57bfdd41 100755
--- a/paimon-python/dev/lint-python.sh
+++ b/paimon-python/dev/lint-python.sh
@@ -119,13 +119,12 @@ function get_all_supported_checks() {
# exec all selected check stages
function check_stage() {
print_function "STAGE" "checks starting"
- for fun in ${SUPPORT_CHECKS[@]}; do
+ for fun in "${SUPPORT_CHECKS[@]}"; do
$fun
done
echo "All the checks are finished, the detailed information can be found
in: $LOG_FILE"
}
-
###############################################################All Checks
Definitions###############################################################
#########################
# This part defines all check functions such as tox_check and flake8_check
@@ -176,7 +175,7 @@ function pytest_check() {
TEST_DIR="pypaimon/tests/py36"
echo "Running tests for Python 3.6: $TEST_DIR"
else
- TEST_DIR="pypaimon/tests --ignore=pypaimon/tests/py36"
+ TEST_DIR="pypaimon/tests --ignore=pypaimon/tests/py36
--ignore=pypaimon/tests/e2e"
echo "Running tests for Python $PYTHON_VERSION (excluding py36):
pypaimon/tests --ignore=pypaimon/tests/py36"
fi
@@ -194,6 +193,43 @@ function pytest_check() {
print_function "STAGE" "pytest checks... [SUCCESS]"
fi
}
+
+# Mixed tests check - runs Java-Python interoperability tests
+function mixed_check() {
+ # Get Python version
+ PYTHON_VERSION=$(python -c "import sys;
print(f'{sys.version_info.major}.{sys.version_info.minor}')")
+ echo "Detected Python version: $PYTHON_VERSION"
+ if [ "$PYTHON_VERSION" = "3.6" ]; then
+ print_function "STAGE" "mixed tests checks... [SKIPPED]"
+ return
+ fi
+ print_function "STAGE" "mixed tests checks"
+
+ # Path to the mixed tests script
+ MIXED_TESTS_SCRIPT="$CURRENT_DIR/dev/run_mixed_tests.sh"
+
+ if [ ! -f "$MIXED_TESTS_SCRIPT" ]; then
+ echo "Mixed tests script not found at: $MIXED_TESTS_SCRIPT"
+ print_function "STAGE" "mixed tests checks... [FAILED]"
+ exit 1
+ fi
+
+ # Make sure the script is executable
+ chmod +x "$MIXED_TESTS_SCRIPT"
+
+ # Run the mixed tests script
+ set -o pipefail
+ ($MIXED_TESTS_SCRIPT) 2>&1 | tee -a $LOG_FILE
+
+ MIXED_TESTS_STATUS=$?
+ if [ $MIXED_TESTS_STATUS -ne 0 ]; then
+ print_function "STAGE" "mixed tests checks... [FAILED]"
+ # Stop the running script.
+ exit 1;
+ else
+ print_function "STAGE" "mixed tests checks... [SUCCESS]"
+ fi
+}
###############################################################All Checks
Definitions###############################################################
# CURRENT_DIR is "paimon-python/"
SCRIPT_PATH="$(readlink -f "$0")"
@@ -236,15 +272,16 @@ INCLUDE_CHECKS=""
USAGE="
usage: $0 [options]
-h print this help message and exit
--e [tox,flake8,sphinx,mypy]
+-e [tox,flake8,sphinx,mypy,mixed]
exclude checks which split by comma(,)
--i [tox,flake8,sphinx,mypy]
+-i [tox,flake8,sphinx,mypy,mixed]
include checks which split by comma(,)
-l list all checks supported.
Examples:
./lint-python.sh => exec all checks.
./lint-python.sh -e tox,flake8 => exclude checks tox,flake8.
./lint-python.sh -i flake8 => include checks flake8.
+ ./lint-python.sh -i mixed => include checks mixed.
./lint-python.sh -l => list all checks supported.
"
while getopts "hfs:i:e:lr" arg; do
@@ -261,7 +298,7 @@ while getopts "hfs:i:e:lr" arg; do
;;
l)
printf "current supported checks includes:\n"
- for fun in ${SUPPORT_CHECKS[@]}; do
+ for fun in "${SUPPORT_CHECKS[@]}"; do
echo ${fun%%_check*}
done
exit 2
diff --git a/paimon-python/dev/run_mixed_tests.sh
b/paimon-python/dev/run_mixed_tests.sh
new file mode 100755
index 0000000000..8387e79eb9
--- /dev/null
+++ b/paimon-python/dev/run_mixed_tests.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+# Mixed Java and Python test runner
+# This script runs Java test first, then Python test to verify interoperability
+
+set -e # Exit on any error
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Get script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+PAIMON_PYTHON_DIR="$PROJECT_ROOT/paimon-python/pypaimon/tests/e2e"
+PAIMON_CORE_DIR="$PROJECT_ROOT/paimon-core"
+
+echo -e "${YELLOW}=== Mixed Java-Python Read Write Test Runner ===${NC}"
+echo "Project root: $PROJECT_ROOT"
+echo "Paimon Python dir: $PAIMON_PYTHON_DIR"
+echo "Paimon Core dir: $PAIMON_CORE_DIR"
+echo ""
+
+# Function to clean up warehouse directory
+cleanup_warehouse() {
+ echo -e "${YELLOW}=== Cleaning up warehouse directory ===${NC}"
+
+ local warehouse_dir="$PAIMON_PYTHON_DIR/warehouse"
+
+ if [[ -d "$warehouse_dir" ]]; then
+ echo "Removing warehouse directory: $warehouse_dir"
+ rm -rf "$warehouse_dir"
+ echo -e "${GREEN}✓ Warehouse directory cleaned up successfully${NC}"
+ else
+ echo "Warehouse directory does not exist, no cleanup needed"
+ fi
+
+ echo ""
+}
+
+# Function to run Java test
+run_java_write_test() {
+ echo -e "${YELLOW}=== Step 1: Running Java Test
(JavaPyE2ETest.testJavaWriteRead) ===${NC}"
+
+ cd "$PROJECT_ROOT"
+
+ # Run the specific Java test method
+ echo "Running Maven test for JavaPyE2ETest.testJavaWriteRead..."
+ if mvn test -Dtest=org.apache.paimon.JavaPyE2ETest#testJavaWriteRead -pl
paimon-core -q -Drun.e2e.tests=true; then
+ echo -e "${GREEN}✓ Java test completed successfully${NC}"
+ return 0
+ else
+ echo -e "${RED}✗ Java test failed${NC}"
+ return 1
+ fi
+}
+
+# Function to run Python test
+run_python_read_test() {
+ echo -e "${YELLOW}=== Step 2: Running Python Test
(JavaPyReadWriteTest.testRead) ===${NC}"
+
+ cd "$PAIMON_PYTHON_DIR"
+
+ # Run the specific Python test method
+ echo "Running Python test for JavaPyReadWriteTest.testRead..."
+ if python -m pytest
java_py_read_write_test.py::JavaPyReadWriteTest::test_read -v; then
+ echo -e "${GREEN}✓ Python test completed successfully${NC}"
+# source deactivate
+ return 0
+ else
+ echo -e "${RED}✗ Python test failed${NC}"
+# source deactivate
+ return 1
+ fi
+}
+
+# Function to run Python Write test for Python-Write-Java-Read scenario
+run_python_write_test() {
+ echo -e "${YELLOW}=== Step 3: Running Python Write Test
(JavaPyReadWriteTest.test_py_write_read) ===${NC}"
+
+ cd "$PAIMON_PYTHON_DIR"
+
+ # Run the specific Python test method for writing data
+ echo "Running Python test for JavaPyReadWriteTest.test_py_write_read
(Python Write)..."
+ if python -m pytest
java_py_read_write_test.py::JavaPyReadWriteTest::test_py_write_read -v; then
+ echo -e "${GREEN}✓ Python write test completed successfully${NC}"
+ return 0
+ else
+ echo -e "${RED}✗ Python write test failed${NC}"
+ return 1
+ fi
+}
+
+# Function to run Java Read test for Python-Write-Java-Read scenario
+run_java_read_test() {
+ echo -e "${YELLOW}=== Step 4: Running Java Read Test
(JavaPyE2ETest.testRead) ===${NC}"
+
+ cd "$PROJECT_ROOT"
+
+ # Run the specific Java test method for reading Python-written data
+ echo "Running Maven test for JavaPyE2ETest.testRead (Java Read)..."
+ if mvn test -Dtest=org.apache.paimon.JavaPyE2ETest#testRead -pl
paimon-core -q -Drun.e2e.tests=true; then
+ echo -e "${GREEN}✓ Java read test completed successfully${NC}"
+ return 0
+ else
+ echo -e "${RED}✗ Java read test failed${NC}"
+ return 1
+ fi
+}
+
+# Main execution
+main() {
+ local java_write_result=0
+ local python_read_result=0
+ local python_write_result=0
+ local java_read_result=0
+
+ echo -e "${YELLOW}Starting mixed language test execution...${NC}"
+ echo ""
+
+ # Run Java write test
+ if ! run_java_write_test; then
+ java_write_result=1
+ echo -e "${RED}Java test failed, but continuing with Python
test...${NC}"
+ echo ""
+ else
+ echo ""
+ fi
+
+ # Run Python read test
+ if ! run_python_read_test; then
+ python_read_result=1
+ fi
+
+ echo ""
+
+ # Run Python Write - Java Read test sequence
+ echo -e "${YELLOW}Starting Python Write - Java Read test sequence...${NC}"
+ echo ""
+
+ # Run Python write test
+ if ! run_python_write_test; then
+ python_write_result=1
+ echo -e "${RED}Python write test failed, but continuing with Java read
test...${NC}"
+ echo ""
+ else
+ echo ""
+ fi
+
+ # Run Java read test
+ if ! run_java_read_test; then
+ java_read_result=1
+ fi
+
+ echo ""
+ echo -e "${YELLOW}=== Test Results Summary ===${NC}"
+
+ if [[ $java_write_result -eq 0 ]]; then
+ echo -e "${GREEN}✓ Java Write Test (JavaPyE2ETest.testJavaWriteRead):
PASSED${NC}"
+ else
+ echo -e "${RED}✗ Java Write Test (JavaPyE2ETest.testJavaWriteRead):
FAILED${NC}"
+ fi
+
+ if [[ $python_read_result -eq 0 ]]; then
+ echo -e "${GREEN}✓ Python Read Test (JavaPyReadWriteTest.testRead):
PASSED${NC}"
+ else
+ echo -e "${RED}✗ Python Read Test (JavaPyReadWriteTest.testRead):
FAILED${NC}"
+ fi
+
+ if [[ $python_write_result -eq 0 ]]; then
+ echo -e "${GREEN}✓ Python Write Test
(JavaPyReadWriteTest.test_py_write_read): PASSED${NC}"
+ else
+ echo -e "${RED}✗ Python Write Test
(JavaPyReadWriteTest.test_py_write_read): FAILED${NC}"
+ fi
+
+ if [[ $java_read_result -eq 0 ]]; then
+ echo -e "${GREEN}✓ Java Read Test (JavaPyE2ETest.testRead):
PASSED${NC}"
+ else
+ echo -e "${RED}✗ Java Read Test (JavaPyE2ETest.testRead): FAILED${NC}"
+ fi
+
+ echo ""
+
+ # Clean up warehouse directory after all tests
+ cleanup_warehouse
+
+ if [[ $java_write_result -eq 0 && $python_read_result -eq 0 &&
$python_write_result -eq 0 && $java_read_result -eq 0 ]]; then
+ echo -e "${GREEN}🎉 All tests passed! Java-Python interoperability
verified.${NC}"
+ return 0
+ else
+ echo -e "${RED}❌ Some tests failed. Please check the output
above.${NC}"
+ return 1
+ fi
+}
+
+# Run main function
+main "$@"
\ No newline at end of file
diff --git a/paimon-python/pypaimon/tests/e2e/__init__.py
b/paimon-python/pypaimon/tests/e2e/__init__.py
new file mode 100644
index 0000000000..a67d5ea255
--- /dev/null
+++ b/paimon-python/pypaimon/tests/e2e/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
b/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
new file mode 100644
index 0000000000..405484d505
--- /dev/null
+++ b/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
@@ -0,0 +1,89 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import unittest
+
+import pandas as pd
+import pyarrow as pa
+from pypaimon.catalog.catalog_factory import CatalogFactory
+from pypaimon.schema.schema import Schema
+
+
+class JavaPyReadWriteTest(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.tempdir = os.path.abspath(".")
+ cls.warehouse = os.path.join(cls.tempdir, 'warehouse')
+ cls.catalog = CatalogFactory.create({
+ 'warehouse': cls.warehouse
+ })
+ cls.catalog.create_database('default', True)
+
+ def test_py_write_read(self):
+ pa_schema = pa.schema([
+ ('id', pa.int32()),
+ ('name', pa.string()),
+ ('category', pa.string()),
+ ('value', pa.float64())
+ ])
+
+ schema = Schema.from_pyarrow_schema(
+ pa_schema,
+ partition_keys=['category'],
+ options={'dynamic-partition-overwrite': 'false'}
+ )
+
+ self.catalog.create_table('default.mixed_test_tablep', schema, False)
+ table = self.catalog.get_table('default.mixed_test_tablep')
+
+ initial_data = pd.DataFrame({
+ 'id': [1, 2, 3, 4, 5, 6],
+ 'name': ['Apple', 'Banana', 'Carrot', 'Broccoli', 'Chicken',
'Beef'],
+ 'category': ['Fruit', 'Fruit', 'Vegetable', 'Vegetable', 'Meat',
'Meat'],
+ 'value': [1.5, 0.8, 0.6, 1.2, 5.0, 8.0]
+ })
+ # Write initial data
+ write_builder = table.new_batch_write_builder()
+ table_write = write_builder.new_write()
+ table_commit = write_builder.new_commit()
+
+ table_write.write_pandas(initial_data)
+ table_commit.commit(table_write.prepare_commit())
+ table_write.close()
+ table_commit.close()
+
+ # Verify initial data
+ read_builder = table.new_read_builder()
+ table_scan = read_builder.new_scan()
+ table_read = read_builder.new_read()
+ initial_result = table_read.to_pandas(table_scan.plan().splits())
+ print(initial_result)
+ self.assertEqual(len(initial_result), 6)
+ self.assertListEqual(
+ initial_result['name'].tolist(),
+ ['Apple', 'Banana', 'Carrot', 'Broccoli', 'Chicken', 'Beef']
+ )
+
+ def test_read(self):
+ table = self.catalog.get_table('default.mixed_test_tablej')
+ read_builder = table.new_read_builder()
+ table_scan = read_builder.new_scan()
+ table_read = read_builder.new_read()
+ res = table_read.to_pandas(table_scan.plan().splits())
+ print(res)