This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 655db9da28 [python] Fix AtomicType.to_dict() inconsistency with java
(#6548)
655db9da28 is described below
commit 655db9da28d42ddf7c679b45047283f9b0ad6a98
Author: universe-hcy <[email protected]>
AuthorDate: Mon Nov 10 10:30:56 2025 +0800
[python] Fix AtomicType.to_dict() inconsistency with java (#6548)
---
.github/workflows/paimon-python-checks.yml | 4 +-
paimon-python/pypaimon/schema/data_types.py | 14 ++++--
paimon-python/pypaimon/tests/data_types_test.py | 67 +++++++++++++++++++++++++
3 files changed, 79 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/paimon-python-checks.yml
b/.github/workflows/paimon-python-checks.yml
old mode 100644
new mode 100755
index 789e3dd9d6..c4c7e32a84
--- a/.github/workflows/paimon-python-checks.yml
+++ b/.github/workflows/paimon-python-checks.yml
@@ -67,10 +67,10 @@ jobs:
if [[ "${{ matrix.python-version }}" == "3.6.15" ]]; then
python -m pip install --upgrade pip==21.3.1
python --version
- python -m pip install -q readerwriterlock==1.0.9
'fsspec==2021.10.1' 'cachetools==4.2.4' 'ossfs==2021.8.0' pyarrow==6.0.1
pandas==1.1.5 'polars==0.9.12' 'fastavro==1.4.7' zstandard==0.19.0
dataclasses==0.8.0 flake8 pytest py4j==0.10.9.9 requests 2>&1 >/dev/null
+ python -m pip install -q readerwriterlock==1.0.9
'fsspec==2021.10.1' 'cachetools==4.2.4' 'ossfs==2021.8.0' pyarrow==6.0.1
pandas==1.1.5 'polars==0.9.12' 'fastavro==1.4.7' zstandard==0.19.0
dataclasses==0.8.0 flake8 pytest py4j==0.10.9.9 requests parameterized==0.8.1
2>&1 >/dev/null
else
python -m pip install --upgrade pip
- python -m pip install -q readerwriterlock==1.0.9 fsspec==2024.3.1
cachetools==5.3.3 ossfs==2023.12.0 ray==2.48.0 fastavro==1.11.1 pyarrow==16.0.0
zstandard==0.24.0 polars==1.32.0 duckdb==1.3.2 numpy==1.24.3 pandas==2.0.3
flake8==4.0.1 pytest~=7.0 py4j==0.10.9.9 requests 2>&1 >/dev/null
+ python -m pip install -q readerwriterlock==1.0.9 fsspec==2024.3.1
cachetools==5.3.3 ossfs==2023.12.0 ray==2.48.0 fastavro==1.11.1 pyarrow==16.0.0
zstandard==0.24.0 polars==1.32.0 duckdb==1.3.2 numpy==1.24.3 pandas==2.0.3
flake8==4.0.1 pytest~=7.0 py4j==0.10.9.9 requests parameterized==0.9.0 2>&1
>/dev/null
fi
- name: Run lint-python.sh
shell: bash
diff --git a/paimon-python/pypaimon/schema/data_types.py
b/paimon-python/pypaimon/schema/data_types.py
index 3f7b30c291..91404cb193 100755
--- a/paimon-python/pypaimon/schema/data_types.py
+++ b/paimon-python/pypaimon/schema/data_types.py
@@ -73,8 +73,10 @@ class AtomicType(DataType):
super().__init__(nullable)
self.type = type
- def to_dict(self) -> Dict[str, Any]:
- return {"type": self.type if self.nullable else self.type + " NOT
NULL"}
+ def to_dict(self) -> str:
+ if not self.nullable:
+ return self.type + " NOT NULL"
+ return self.type
@classmethod
def from_dict(cls, data: str) -> "AtomicType":
@@ -119,7 +121,8 @@ class MultisetType(DataType):
def to_dict(self) -> Dict[str, Any]:
return {
- "type": "MULTISET{}".format('<' + str(self.element) + '>' if
self.element else ''),
+ "type": "MULTISET{}{}".format('<' + str(self.element) + '>' if
self.element else '',
+ " NOT NULL" if not self.nullable
else ""),
"element": self.element.to_dict() if self.element else None,
"nullable": self.nullable,
}
@@ -232,7 +235,10 @@ class RowType(DataType):
return DataTypeParser.parse_data_type(data)
def __str__(self) -> str:
- field_strs = ["{}: {}".format(field.name, field.type) for field in
self.fields]
+ field_strs = []
+ for field in self.fields:
+ description = " COMMENT {}".format(field.description) if
field.description else ""
+ field_strs.append("{}: {}{}".format(field.name, field.type,
description))
null_suffix = "" if self.nullable else " NOT NULL"
return "ROW<{}>{}".format(', '.join(field_strs), null_suffix)
diff --git a/paimon-python/pypaimon/tests/data_types_test.py
b/paimon-python/pypaimon/tests/data_types_test.py
new file mode 100755
index 0000000000..53644e24c5
--- /dev/null
+++ b/paimon-python/pypaimon/tests/data_types_test.py
@@ -0,0 +1,67 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import unittest
+from parameterized import parameterized
+
+from pypaimon.schema.data_types import DataField, AtomicType, ArrayType,
MultisetType, MapType, RowType
+
+
+class DataTypesTest(unittest.TestCase):
+ def test_atomic_type(self):
+ self.assertEqual(str(AtomicType("BLOB")), "BLOB")
+ self.assertEqual(str(AtomicType("TINYINT", nullable=False)), "TINYINT
NOT NULL")
+ self.assertEqual(str(AtomicType("BIGINT", nullable=False)), "BIGINT
NOT NULL")
+ self.assertEqual(str(AtomicType("BOOLEAN", nullable=False)), "BOOLEAN
NOT NULL")
+ self.assertEqual(str(AtomicType("DOUBLE")), "DOUBLE")
+ self.assertEqual(str(AtomicType("STRING")), "STRING")
+ self.assertEqual(str(AtomicType("BINARY(12)")), "BINARY(12)")
+ self.assertEqual(str(AtomicType("DECIMAL(10, 6)")), "DECIMAL(10, 6)")
+ self.assertEqual(str(AtomicType("BYTES")), "BYTES")
+ self.assertEqual(str(AtomicType("DATE")), "DATE")
+ self.assertEqual(str(AtomicType("TIME(0)")), "TIME(0)")
+ self.assertEqual(str(AtomicType("TIMESTAMP(0)")), "TIMESTAMP(0)")
+ self.assertEqual(str(AtomicType("SMALLINT", nullable=False)),
+ str(AtomicType.from_dict(AtomicType("SMALLINT",
nullable=False).to_dict())))
+ self.assertEqual(str(AtomicType("INT")),
+
str(AtomicType.from_dict(AtomicType("INT").to_dict())))
+
+ @parameterized.expand([
+ (ArrayType, AtomicType("TIMESTAMP(6)"), "ARRAY<TIMESTAMP(6)>",
"ARRAY<ARRAY<TIMESTAMP(6)>>"),
+ (MultisetType, AtomicType("TIMESTAMP(6)"), "MULTISET<TIMESTAMP(6)>",
"MULTISET<MULTISET<TIMESTAMP(6)>>")
+ ])
+ def test_complex_types(self, data_type_class, element_type, expected1,
expected2):
+ self.assertEqual(str(data_type_class(True, element_type)), expected1)
+ self.assertEqual(str(data_type_class(True, data_type_class(True,
element_type))), expected2)
+ self.assertEqual(str(data_type_class(False, element_type)), expected1
+ " NOT NULL")
+ self.assertEqual(str(data_type_class(False, element_type)),
+ str(data_type_class.from_dict(data_type_class(False,
element_type).to_dict())))
+ self.assertEqual(str(data_type_class(True, element_type)),
+ str(data_type_class.from_dict(data_type_class(True,
element_type).to_dict())))
+
+ def test_map_type(self):
+ self.assertEqual(str(MapType(True, AtomicType("STRING"),
AtomicType("TIMESTAMP(6)"))),
+ "MAP<STRING, TIMESTAMP(6)>")
+
+ def test_row_type(self):
+ self.assertEqual(str(RowType(True, [DataField(0, "a",
AtomicType("STRING"), "Someone's desc."),
+ DataField(1, "b",
AtomicType("TIMESTAMP(6)"),)])),
+ "ROW<a: STRING COMMENT Someone's desc., b:
TIMESTAMP(6)>")
+ row_data = RowType(True, [DataField(0, "a", AtomicType("STRING"),
"Someone's desc."),
+ DataField(1, "b",
AtomicType("TIMESTAMP(6)"),)])
+ self.assertEqual(str(row_data),
+ str(RowType.from_dict(row_data.to_dict())))