This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch release-1.3
in repository https://gitbox.apache.org/repos/asf/paimon.git

commit 787210fbeadc346a906bcab37ca1ecbed4868b59
Author: universe-hcy <[email protected]>
AuthorDate: Mon Nov 10 10:30:56 2025 +0800

    [python] Fix AtomicType.to_dict() inconsistency with java (#6548)
---
 .github/workflows/paimon-python-checks.yml      |  4 +-
 paimon-python/pypaimon/schema/data_types.py     | 14 ++++--
 paimon-python/pypaimon/tests/data_types_test.py | 67 +++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/paimon-python-checks.yml 
b/.github/workflows/paimon-python-checks.yml
old mode 100644
new mode 100755
index 789e3dd9d6..c4c7e32a84
--- a/.github/workflows/paimon-python-checks.yml
+++ b/.github/workflows/paimon-python-checks.yml
@@ -67,10 +67,10 @@ jobs:
           if [[ "${{ matrix.python-version }}" == "3.6.15" ]]; then
             python -m pip install --upgrade pip==21.3.1
             python --version
-            python -m pip install -q readerwriterlock==1.0.9 
'fsspec==2021.10.1' 'cachetools==4.2.4' 'ossfs==2021.8.0' pyarrow==6.0.1 
pandas==1.1.5 'polars==0.9.12' 'fastavro==1.4.7' zstandard==0.19.0 
dataclasses==0.8.0 flake8 pytest py4j==0.10.9.9 requests 2>&1 >/dev/null
+            python -m pip install -q readerwriterlock==1.0.9 
'fsspec==2021.10.1' 'cachetools==4.2.4' 'ossfs==2021.8.0' pyarrow==6.0.1 
pandas==1.1.5 'polars==0.9.12' 'fastavro==1.4.7' zstandard==0.19.0 
dataclasses==0.8.0 flake8 pytest py4j==0.10.9.9 requests parameterized==0.8.1 
2>&1 >/dev/null
           else
             python -m pip install --upgrade pip
-            python -m pip install -q readerwriterlock==1.0.9 fsspec==2024.3.1 
cachetools==5.3.3 ossfs==2023.12.0 ray==2.48.0 fastavro==1.11.1 pyarrow==16.0.0 
zstandard==0.24.0 polars==1.32.0 duckdb==1.3.2 numpy==1.24.3 pandas==2.0.3 
flake8==4.0.1 pytest~=7.0 py4j==0.10.9.9 requests 2>&1 >/dev/null
+            python -m pip install -q readerwriterlock==1.0.9 fsspec==2024.3.1 
cachetools==5.3.3 ossfs==2023.12.0 ray==2.48.0 fastavro==1.11.1 pyarrow==16.0.0 
zstandard==0.24.0 polars==1.32.0 duckdb==1.3.2 numpy==1.24.3 pandas==2.0.3 
flake8==4.0.1 pytest~=7.0 py4j==0.10.9.9 requests parameterized==0.9.0 2>&1 
>/dev/null
           fi
       - name: Run lint-python.sh
         shell: bash
diff --git a/paimon-python/pypaimon/schema/data_types.py 
b/paimon-python/pypaimon/schema/data_types.py
index 3f7b30c291..91404cb193 100755
--- a/paimon-python/pypaimon/schema/data_types.py
+++ b/paimon-python/pypaimon/schema/data_types.py
@@ -73,8 +73,10 @@ class AtomicType(DataType):
         super().__init__(nullable)
         self.type = type
 
-    def to_dict(self) -> Dict[str, Any]:
-        return {"type": self.type if self.nullable else self.type + " NOT 
NULL"}
+    def to_dict(self) -> str:
+        if not self.nullable:
+            return self.type + " NOT NULL"
+        return self.type
 
     @classmethod
     def from_dict(cls, data: str) -> "AtomicType":
@@ -119,7 +121,8 @@ class MultisetType(DataType):
 
     def to_dict(self) -> Dict[str, Any]:
         return {
-            "type": "MULTISET{}".format('<' + str(self.element) + '>' if 
self.element else ''),
+            "type": "MULTISET{}{}".format('<' + str(self.element) + '>' if 
self.element else '',
+                                          " NOT NULL" if not self.nullable 
else ""),
             "element": self.element.to_dict() if self.element else None,
             "nullable": self.nullable,
         }
@@ -232,7 +235,10 @@ class RowType(DataType):
         return DataTypeParser.parse_data_type(data)
 
     def __str__(self) -> str:
-        field_strs = ["{}: {}".format(field.name, field.type) for field in 
self.fields]
+        field_strs = []
+        for field in self.fields:
+            description = " COMMENT {}".format(field.description) if 
field.description else ""
+            field_strs.append("{}: {}{}".format(field.name, field.type, 
description))
         null_suffix = "" if self.nullable else " NOT NULL"
         return "ROW<{}>{}".format(', '.join(field_strs), null_suffix)
 
diff --git a/paimon-python/pypaimon/tests/data_types_test.py 
b/paimon-python/pypaimon/tests/data_types_test.py
new file mode 100755
index 0000000000..53644e24c5
--- /dev/null
+++ b/paimon-python/pypaimon/tests/data_types_test.py
@@ -0,0 +1,67 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import unittest
+from parameterized import parameterized
+
+from pypaimon.schema.data_types import DataField, AtomicType, ArrayType, 
MultisetType, MapType, RowType
+
+
+class DataTypesTest(unittest.TestCase):
+    def test_atomic_type(self):
+        self.assertEqual(str(AtomicType("BLOB")), "BLOB")
+        self.assertEqual(str(AtomicType("TINYINT", nullable=False)), "TINYINT 
NOT NULL")
+        self.assertEqual(str(AtomicType("BIGINT", nullable=False)), "BIGINT 
NOT NULL")
+        self.assertEqual(str(AtomicType("BOOLEAN", nullable=False)), "BOOLEAN 
NOT NULL")
+        self.assertEqual(str(AtomicType("DOUBLE")), "DOUBLE")
+        self.assertEqual(str(AtomicType("STRING")), "STRING")
+        self.assertEqual(str(AtomicType("BINARY(12)")), "BINARY(12)")
+        self.assertEqual(str(AtomicType("DECIMAL(10, 6)")), "DECIMAL(10, 6)")
+        self.assertEqual(str(AtomicType("BYTES")), "BYTES")
+        self.assertEqual(str(AtomicType("DATE")), "DATE")
+        self.assertEqual(str(AtomicType("TIME(0)")), "TIME(0)")
+        self.assertEqual(str(AtomicType("TIMESTAMP(0)")), "TIMESTAMP(0)")
+        self.assertEqual(str(AtomicType("SMALLINT", nullable=False)),
+                         str(AtomicType.from_dict(AtomicType("SMALLINT", 
nullable=False).to_dict())))
+        self.assertEqual(str(AtomicType("INT")),
+                         
str(AtomicType.from_dict(AtomicType("INT").to_dict())))
+
+    @parameterized.expand([
+        (ArrayType, AtomicType("TIMESTAMP(6)"), "ARRAY<TIMESTAMP(6)>", 
"ARRAY<ARRAY<TIMESTAMP(6)>>"),
+        (MultisetType, AtomicType("TIMESTAMP(6)"), "MULTISET<TIMESTAMP(6)>", 
"MULTISET<MULTISET<TIMESTAMP(6)>>")
+    ])
+    def test_complex_types(self, data_type_class, element_type, expected1, 
expected2):
+        self.assertEqual(str(data_type_class(True, element_type)), expected1)
+        self.assertEqual(str(data_type_class(True, data_type_class(True, 
element_type))), expected2)
+        self.assertEqual(str(data_type_class(False, element_type)), expected1 
+ " NOT NULL")
+        self.assertEqual(str(data_type_class(False, element_type)),
+                         str(data_type_class.from_dict(data_type_class(False, 
element_type).to_dict())))
+        self.assertEqual(str(data_type_class(True, element_type)),
+                         str(data_type_class.from_dict(data_type_class(True, 
element_type).to_dict())))
+
+    def test_map_type(self):
+        self.assertEqual(str(MapType(True, AtomicType("STRING"), 
AtomicType("TIMESTAMP(6)"))),
+                         "MAP<STRING, TIMESTAMP(6)>")
+
+    def test_row_type(self):
+        self.assertEqual(str(RowType(True, [DataField(0, "a", 
AtomicType("STRING"), "Someone's desc."),
+                                            DataField(1, "b", 
AtomicType("TIMESTAMP(6)"),)])),
+                         "ROW<a: STRING COMMENT Someone's desc., b: 
TIMESTAMP(6)>")
+        row_data = RowType(True, [DataField(0, "a", AtomicType("STRING"), 
"Someone's desc."),
+                                  DataField(1, "b", 
AtomicType("TIMESTAMP(6)"),)])
+        self.assertEqual(str(row_data),
+                         str(RowType.from_dict(row_data.to_dict())))

Reply via email to