(paimon) branch master updated: [Python] Enhance data type support (#6253)

lzljs3620320 Sun, 14 Sep 2025 19:08:25 -0700

This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git



The following commit(s) were added to refs/heads/master by this push:
     new f43a348970 [Python] Enhance data type support (#6253)
f43a348970 is described below

commit f43a348970ac636a167d1c5631f8abd79185be45
Author: ChengHui Chen <[email protected]>
AuthorDate: Mon Sep 15 10:07:39 2025 +0800

    [Python] Enhance data type support (#6253)
---
 paimon-python/pypaimon/schema/data_types.py        | 122 ++++++++++++++-------
 paimon-python/pypaimon/table/row/generic_row.py    |  53 +++++----
 .../pypaimon/tests/py36/ao_read_write_test.py      |  63 ++++++++++-
 paimon-python/pypaimon/tests/reader_basic_test.py  |  67 ++++++++++-
 paimon-python/pypaimon/tests/schema_test.py        |  27 ++++-
 5 files changed, 259 insertions(+), 73 deletions(-)

diff --git a/paimon-python/pypaimon/schema/data_types.py 
b/paimon-python/pypaimon/schema/data_types.py
index b53a779b41..5255ac6b1e 100644
--- a/paimon-python/pypaimon/schema/data_types.py
+++ b/paimon-python/pypaimon/schema/data_types.py
@@ -23,6 +23,7 @@ from enum import Enum
 from typing import Any, Dict, List, Optional, Union
 
 import pyarrow
+from pyarrow import types
 
 
 class AtomicInteger:
@@ -385,6 +386,7 @@ class PyarrowFieldParser:
 
     @staticmethod
     def from_paimon_type(data_type: DataType) -> pyarrow.DataType:
+        # Based on Paimon DataTypes Doc: 
https://paimon.apache.org/docs/master/concepts/data-types/
         if isinstance(data_type, AtomicType):
             type_name = data_type.type.upper()
             if type_name == 'TINYINT':
@@ -401,31 +403,67 @@ class PyarrowFieldParser:
                 return pyarrow.float64()
             elif type_name == 'BOOLEAN':
                 return pyarrow.bool_()
-            elif type_name == 'STRING':
+            elif type_name == 'STRING' or type_name.startswith('CHAR') or 
type_name.startswith('VARCHAR'):
                 return pyarrow.string()
-            elif type_name == 'BINARY':
+            elif type_name == 'BYTES' or type_name.startswith('VARBINARY'):
                 return pyarrow.binary()
+            elif type_name.startswith('BINARY'):
+                if type_name == 'BINARY':
+                    return pyarrow.binary(1)
+                match = re.fullmatch(r'BINARY\((\d+)\)', type_name)
+                if match:
+                    length = int(match.group(1))
+                    if length > 0:
+                        return pyarrow.binary(length)
+            elif type_name.startswith('DECIMAL'):
+                if type_name == 'DECIMAL':
+                    return pyarrow.decimal128(10, 0)  # default to 10, 0
+                match_ps = re.fullmatch(r'DECIMAL\((\d+),\s*(\d+)\)', 
type_name)
+                if match_ps:
+                    precision, scale = map(int, match_ps.groups())
+                    return pyarrow.decimal128(precision, scale)
+                match_p = re.fullmatch(r'DECIMAL\((\d+)\)', type_name)
+                if match_p:
+                    precision = int(match_p.group(1))
+                    return pyarrow.decimal128(precision, 0)
+            if type_name.startswith('TIMESTAMP'):
+                # WITH_LOCAL_TIME_ZONE is ambiguous and not supported
+                if type_name == 'TIMESTAMP':
+                    return pyarrow.timestamp('us', tz=None)  # default to 6
+                match = re.fullmatch(r'TIMESTAMP\((\d+)\)', type_name)
+                if match:
+                    precision = int(match.group(1))
+                    if precision == 0:
+                        return pyarrow.timestamp('s', tz=None)
+                    elif 1 <= precision <= 3:
+                        return pyarrow.timestamp('ms', tz=None)
+                    elif 4 <= precision <= 6:
+                        return pyarrow.timestamp('us', tz=None)
+                    elif 7 <= precision <= 9:
+                        return pyarrow.timestamp('ns', tz=None)
             elif type_name == 'DATE':
                 return pyarrow.date32()
-            elif type_name == 'TIMESTAMP':
-                return pyarrow.timestamp('ms')
-            elif type_name.startswith('DECIMAL'):
-                match = re.match(r'DECIMAL\((\d+),\s*(\d+)\)', type_name)
+            if type_name.startswith('TIME'):
+                if type_name == 'TIME':
+                    return pyarrow.time64('us')  # default to 6
+                match = re.fullmatch(r'TIME\((\d+)\)', type_name)
                 if match:
-                    precision, scale = map(int, match.groups())
-                    return pyarrow.decimal128(precision, scale)
-                else:
-                    return pyarrow.decimal128(38, 18)
-            else:
-                raise ValueError("Unsupported data type: {}".format(type_name))
+                    precision = int(match.group(1))
+                    if precision == 0:
+                        return pyarrow.time32('s')
+                    if 1 <= precision <= 3:
+                        return pyarrow.time32('ms')
+                    if 4 <= precision <= 6:
+                        return pyarrow.time64('us')
+                    if 7 <= precision <= 9:
+                        return pyarrow.time64('ns')
         elif isinstance(data_type, ArrayType):
             return 
pyarrow.list_(PyarrowFieldParser.from_paimon_type(data_type.element))
         elif isinstance(data_type, MapType):
             key_type = PyarrowFieldParser.from_paimon_type(data_type.key)
             value_type = PyarrowFieldParser.from_paimon_type(data_type.value)
             return pyarrow.map_(key_type, value_type)
-        else:
-            raise ValueError("Unsupported data type: {}".format(data_type))
+        raise ValueError("Unsupported data type: {}".format(data_type))
 
     @staticmethod
     def from_paimon_field(data_field: DataField) -> pyarrow.Field:
@@ -444,48 +482,52 @@ class PyarrowFieldParser:
 
     @staticmethod
     def to_paimon_type(pa_type: pyarrow.DataType, nullable: bool) -> DataType:
-        type_name = str(pa_type)
-        if type_name == "int8":
+        # Based on Arrow DataTypes Doc: 
https://arrow.apache.org/docs/python/api/datatypes.html
+        # All safe mappings are already implemented, adding new mappings 
requires rigorous evaluation
+        # to avoid potential data loss
+        type_name = None
+        if types.is_int8(pa_type):
             type_name = 'TINYINT'
-        elif type_name == "int16":
+        elif types.is_int16(pa_type):
             type_name = 'SMALLINT'
-        elif type_name == "int32":
+        elif types.is_int32(pa_type):
             type_name = 'INT'
-        elif type_name == "int64":
+        elif types.is_int64(pa_type):
             type_name = 'BIGINT'
-        elif type_name.startswith('float'):
+        elif types.is_float32(pa_type):
             type_name = 'FLOAT'
-        elif type_name.startswith('double'):
+        elif types.is_float64(pa_type):
             type_name = 'DOUBLE'
-        elif type_name.startswith('bool'):
+        elif types.is_boolean(pa_type):
             type_name = 'BOOLEAN'
-        elif type_name.startswith('string'):
+        elif types.is_string(pa_type):
             type_name = 'STRING'
-        elif type_name.startswith('binary'):
-            type_name = 'BINARY'
-        elif type_name.startswith('date'):
+        elif types.is_fixed_size_binary(pa_type):
+            type_name = f'BINARY({pa_type.byte_width})'
+        elif types.is_binary(pa_type):
+            type_name = 'BYTES'
+        elif types.is_decimal(pa_type):
+            type_name = f'DECIMAL({pa_type.precision}, {pa_type.scale})'
+        elif types.is_timestamp(pa_type) and pa_type.tz is None:
+            precision_mapping = {'s': 0, 'ms': 3, 'us': 6, 'ns': 9}
+            type_name = f'TIMESTAMP({precision_mapping[pa_type.unit]})'
+        elif types.is_date32(pa_type):
             type_name = 'DATE'
-        elif type_name.startswith('timestamp'):
-            type_name = 'TIMESTAMP'
-        elif type_name.startswith('decimal'):
-            match = re.match(r'decimal\((\d+),\s*(\d+)\)', type_name)
-            if match:
-                precision, scale = map(int, match.groups())
-                type_name = 'DECIMAL({},{})'.format(precision, scale)
-            else:
-                type_name = 'DECIMAL(38,18)'
-        elif type_name.startswith('list'):
+        elif types.is_time(pa_type):
+            precision_mapping = {'s': 0, 'ms': 3, 'us': 6, 'ns': 9}
+            type_name = f'TIME({precision_mapping[pa_type.unit]})'
+        elif types.is_list(pa_type) or types.is_large_list(pa_type):
             pa_type: pyarrow.ListType
             element_type = 
PyarrowFieldParser.to_paimon_type(pa_type.value_type, nullable)
             return ArrayType(nullable, element_type)
-        elif type_name.startswith('map'):
+        elif types.is_map(pa_type):
             pa_type: pyarrow.MapType
             key_type = PyarrowFieldParser.to_paimon_type(pa_type.key_type, 
nullable)
             value_type = PyarrowFieldParser.to_paimon_type(pa_type.item_type, 
nullable)
             return MapType(nullable, key_type, value_type)
-        else:
-            raise ValueError("Unknown type: {}".format(type_name))
-        return AtomicType(type_name, nullable)
+        if type_name is not None:
+            return AtomicType(type_name, nullable)
+        raise ValueError("Unsupported pyarrow type: {}".format(pa_type))
 
     @staticmethod
     def to_paimon_field(field_idx: int, pa_field: pyarrow.Field) -> DataField:
diff --git a/paimon-python/pypaimon/table/row/generic_row.py 
b/paimon-python/pypaimon/table/row/generic_row.py
index 14f42e806c..a7612168d9 100644
--- a/paimon-python/pypaimon/table/row/generic_row.py
+++ b/paimon-python/pypaimon/table/row/generic_row.py
@@ -18,7 +18,7 @@
 
 import struct
 from dataclasses import dataclass
-from datetime import datetime, timedelta, timezone
+from datetime import date, datetime, time, timedelta
 from decimal import Decimal
 from typing import Any, List
 
@@ -107,17 +107,17 @@ class GenericRowDeserializer:
             return cls._parse_float(bytes_data, field_offset)
         elif type_name in ['DOUBLE']:
             return cls._parse_double(bytes_data, field_offset)
-        elif type_name in ['VARCHAR', 'STRING', 'CHAR']:
+        elif type_name.startswith('CHAR') or type_name.startswith('VARCHAR') 
or type_name == 'STRING':
             return cls._parse_string(bytes_data, base_offset, field_offset)
-        elif type_name in ['BINARY', 'VARBINARY', 'BYTES']:
+        elif type_name.startswith('BINARY') or 
type_name.startswith('VARBINARY') or type_name == 'BYTES':
             return cls._parse_binary(bytes_data, base_offset, field_offset)
-        elif type_name in ['DECIMAL', 'NUMERIC']:
+        elif type_name.startswith('DECIMAL') or 
type_name.startswith('NUMERIC'):
             return cls._parse_decimal(bytes_data, base_offset, field_offset, 
data_type)
-        elif type_name in ['TIMESTAMP', 'TIMESTAMP_WITHOUT_TIME_ZONE']:
+        elif type_name.startswith('TIMESTAMP'):
             return cls._parse_timestamp(bytes_data, base_offset, field_offset, 
data_type)
         elif type_name in ['DATE']:
             return cls._parse_date(bytes_data, field_offset)
-        elif type_name in ['TIME', 'TIME_WITHOUT_TIME_ZONE']:
+        elif type_name.startswith('TIME'):
             return cls._parse_time(bytes_data, field_offset)
         else:
             return cls._parse_string(bytes_data, base_offset, field_offset)
@@ -213,19 +213,19 @@ class GenericRowDeserializer:
     @classmethod
     def _parse_timestamp(cls, bytes_data: bytes, base_offset: int, 
field_offset: int, data_type: DataType) -> datetime:
         millis = struct.unpack('<q', bytes_data[field_offset:field_offset + 
8])[0]
-        return datetime.fromtimestamp(millis / 1000.0, tz=timezone.utc)
+        return datetime.fromtimestamp(millis / 1000.0, tz=None)
 
     @classmethod
-    def _parse_date(cls, bytes_data: bytes, field_offset: int) -> datetime:
+    def _parse_date(cls, bytes_data: bytes, field_offset: int) -> date:
         days = struct.unpack('<i', bytes_data[field_offset:field_offset + 
4])[0]
-        return datetime(1970, 1, 1) + timedelta(days=days)
+        return date(1970, 1, 1) + timedelta(days=days)
 
     @classmethod
-    def _parse_time(cls, bytes_data: bytes, field_offset: int) -> datetime:
+    def _parse_time(cls, bytes_data: bytes, field_offset: int) -> time:
         millis = struct.unpack('<i', bytes_data[field_offset:field_offset + 
4])[0]
         seconds = millis // 1000
         microseconds = (millis % 1000) * 1000
-        return datetime(1970, 1, 1).replace(
+        return time(
             hour=seconds // 3600,
             minute=(seconds % 3600) // 60,
             second=seconds % 60,
@@ -260,8 +260,8 @@ class GenericRowSerializer:
                 raise ValueError(f"BinaryRow only support AtomicType yet, meet 
{field.type.__class__}")
 
             type_name = field.type.type.upper()
-            if type_name in ['VARCHAR', 'STRING', 'CHAR', 'BINARY', 
'VARBINARY', 'BYTES']:
-                if type_name in ['VARCHAR', 'STRING', 'CHAR']:
+            if any(type_name.startswith(p) for p in ['CHAR', 'VARCHAR', 
'STRING', 'BINARY', 'VARBINARY', 'BYTES']):
+                if any(type_name.startswith(p) for p in ['CHAR', 'VARCHAR', 
'STRING']):
                     value_bytes = str(value).encode('utf-8')
                 else:
                     value_bytes = bytes(value)
@@ -320,13 +320,13 @@ class GenericRowSerializer:
             return cls._serialize_float(value) + b'\x00' * 4
         elif type_name in ['DOUBLE']:
             return cls._serialize_double(value)
-        elif type_name in ['DECIMAL', 'NUMERIC']:
+        elif type_name.startswith('DECIMAL') or 
type_name.startswith('NUMERIC'):
             return cls._serialize_decimal(value, data_type)
-        elif type_name in ['TIMESTAMP', 'TIMESTAMP_WITHOUT_TIME_ZONE']:
+        elif type_name.startswith('TIMESTAMP'):
             return cls._serialize_timestamp(value)
         elif type_name in ['DATE']:
             return cls._serialize_date(value) + b'\x00' * 4
-        elif type_name in ['TIME', 'TIME_WITHOUT_TIME_ZONE']:
+        elif type_name.startswith('TIME'):
             return cls._serialize_time(value) + b'\x00' * 4
         else:
             raise TypeError(f"Unsupported type for serialization: {type_name}")
@@ -379,27 +379,26 @@ class GenericRowSerializer:
 
     @classmethod
     def _serialize_timestamp(cls, value: datetime) -> bytes:
-        if value.tzinfo is None:
-            value = value.replace(tzinfo=timezone.utc)
+        if value.tzinfo is not None:
+            raise RuntimeError("datetime tzinfo not supported yet")
         millis = int(value.timestamp() * 1000)
         return struct.pack('<q', millis)
 
     @classmethod
-    def _serialize_date(cls, value: datetime) -> bytes:
-        if isinstance(value, datetime):
+    def _serialize_date(cls, value: date) -> bytes:
+        if isinstance(value, date):
             epoch = datetime(1970, 1, 1).date()
-            days = (value.date() - epoch).days
+            days = (value - epoch).days
         else:
-            raise RuntimeError("date should be datatime")
+            raise RuntimeError("value should be datatime.date")
         return struct.pack('<i', days)
 
     @classmethod
-    def _serialize_time(cls, value: datetime) -> bytes:
-        if isinstance(value, datetime):
-            midnight = value.replace(hour=0, minute=0, second=0, microsecond=0)
-            millis = int((value - midnight).total_seconds() * 1000)
-        else:
+    def _serialize_time(cls, value: time) -> bytes:
+        if isinstance(value, time):
             millis = value.hour * 3600000 + value.minute * 60000 + 
value.second * 1000 + value.microsecond // 1000
+        else:
+            raise RuntimeError("value should be datatime.time")
         return struct.pack('<i', millis)
 
     @classmethod
diff --git a/paimon-python/pypaimon/tests/py36/ao_read_write_test.py 
b/paimon-python/pypaimon/tests/py36/ao_read_write_test.py
index 0e8d97d47b..fcd05ee6cf 100644
--- a/paimon-python/pypaimon/tests/py36/ao_read_write_test.py
+++ b/paimon-python/pypaimon/tests/py36/ao_read_write_test.py
@@ -16,7 +16,8 @@ See the License for the specific language governing 
permissions and
 limitations under the License.
 """
 import logging
-from datetime import datetime
+from datetime import datetime, date
+from decimal import Decimal
 from unittest.mock import Mock
 
 import pandas as pd
@@ -124,6 +125,66 @@ class RESTTableReadWritePy36Test(RESTCatalogBaseTest):
         pd.testing.assert_frame_equal(
             actual_df2.reset_index(drop=True), df2.reset_index(drop=True))
 
+    def test_full_data_types(self):
+        simple_pa_schema = pa.schema([
+            ('f0', pa.int8()),
+            ('f1', pa.int16()),
+            ('f2', pa.int32()),
+            ('f3', pa.int64()),
+            ('f4', pa.float32()),
+            ('f5', pa.float64()),
+            ('f6', pa.bool_()),
+            ('f7', pa.string()),
+            ('f8', pa.binary()),
+            ('f9', pa.binary(10)),
+            ('f10', pa.decimal128(10, 2)),
+            ('f11', pa.date32()),
+        ])
+        schema = Schema.from_pyarrow_schema(simple_pa_schema)
+        self.rest_catalog.create_table('default.test_full_data_types', schema, 
False)
+        table = self.rest_catalog.get_table('default.test_full_data_types')
+
+        # to test read and write
+        write_builder = table.new_batch_write_builder()
+        table_write = write_builder.new_write()
+        table_commit = write_builder.new_commit()
+        expect_data = pa.Table.from_pydict({
+            'f0': [-1, 2],
+            'f1': [-1001, 1002],
+            'f2': [-1000001, 1000002],
+            'f3': [-10000000001, 10000000002],
+            'f4': [-1001.05, 1002.05],
+            'f5': [-1000001.05, 1000002.05],
+            'f6': [False, True],
+            'f7': ['Hello', 'World'],
+            'f8': [b'\x01\x02\x03', b'pyarrow'],
+            'f9': [b'exactly_10', b'pad'.ljust(10, b'\x00')],
+            'f10': [Decimal('-987.65'), Decimal('12345.67')],
+            'f11': [date(1999, 12, 31), date(2023, 1, 1)],
+        }, schema=simple_pa_schema)
+        table_write.write_arrow(expect_data)
+        table_commit.commit(table_write.prepare_commit())
+        table_write.close()
+        table_commit.close()
+
+        read_builder = table.new_read_builder()
+        table_scan = read_builder.new_scan()
+        table_read = read_builder.new_read()
+        actual_data = table_read.to_arrow(table_scan.plan().splits())
+        self.assertEqual(actual_data, expect_data)
+
+        # to test GenericRow ability
+        latest_snapshot = table_scan.snapshot_manager.get_latest_snapshot()
+        manifest_files = 
table_scan.manifest_list_manager.read_all(latest_snapshot)
+        manifest_entries = 
table_scan.manifest_file_manager.read(manifest_files[0].file_name,
+                                                                 lambda row: 
table_scan._bucket_filter(row))
+        min_value_stats = 
manifest_entries[0].file.value_stats.min_values.values
+        max_value_stats = 
manifest_entries[0].file.value_stats.max_values.values
+        expected_min_values = [col[0].as_py() for col in expect_data]
+        expected_max_values = [col[1].as_py() for col in expect_data]
+        self.assertEqual(min_value_stats, expected_min_values)
+        self.assertEqual(max_value_stats, expected_max_values)
+
     def test_mixed_add_and_delete_entries_same_partition(self):
         """Test record_count calculation with mixed ADD/DELETE entries in same 
partition."""
         pa_schema = pa.schema([
diff --git a/paimon-python/pypaimon/tests/reader_basic_test.py 
b/paimon-python/pypaimon/tests/reader_basic_test.py
index ce5a0bc308..e66f7cb94a 100644
--- a/paimon-python/pypaimon/tests/reader_basic_test.py
+++ b/paimon-python/pypaimon/tests/reader_basic_test.py
@@ -20,7 +20,8 @@ import os
 import shutil
 import tempfile
 import unittest
-from datetime import datetime
+from datetime import datetime, date, time
+from decimal import Decimal
 from unittest.mock import Mock
 
 import pandas as pd
@@ -159,6 +160,70 @@ class ReaderBasicTest(unittest.TestCase):
         pd.testing.assert_frame_equal(
             actual_df2.reset_index(drop=True), df2.reset_index(drop=True))
 
+    def test_full_data_types(self):
+        simple_pa_schema = pa.schema([
+            ('f0', pa.int8()),
+            ('f1', pa.int16()),
+            ('f2', pa.int32()),
+            ('f3', pa.int64()),
+            ('f4', pa.float32()),
+            ('f5', pa.float64()),
+            ('f6', pa.bool_()),
+            ('f7', pa.string()),
+            ('f8', pa.binary()),
+            ('f9', pa.binary(10)),
+            ('f10', pa.decimal128(10, 2)),
+            ('f11', pa.timestamp('ms')),
+            ('f12', pa.date32()),
+            ('f13', pa.time64('us')),
+        ])
+        schema = Schema.from_pyarrow_schema(simple_pa_schema)
+        self.catalog.create_table('default.test_full_data_types', schema, 
False)
+        table = self.catalog.get_table('default.test_full_data_types')
+
+        # to test read and write
+        write_builder = table.new_batch_write_builder()
+        table_write = write_builder.new_write()
+        table_commit = write_builder.new_commit()
+        expect_data = pa.Table.from_pydict({
+            'f0': [-1, 2],
+            'f1': [-1001, 1002],
+            'f2': [-1000001, 1000002],
+            'f3': [-10000000001, 10000000002],
+            'f4': [-1001.05, 1002.05],
+            'f5': [-1000001.05, 1000002.05],
+            'f6': [False, True],
+            'f7': ['Hello', 'World'],
+            'f8': [b'\x01\x02\x03', b'pyarrow'],
+            'f9': [b'exactly_10', b'pad'.ljust(10, b'\x00')],
+            'f10': [Decimal('-987.65'), Decimal('12345.67')],
+            'f11': [datetime(2000, 1, 1, 0, 0, 0, 123456), datetime(2023, 10, 
27, 8, 0, 0)],
+            'f12': [date(1999, 12, 31), date(2023, 1, 1)],
+            'f13': [time(10, 30, 0), time(23, 59, 59, 999000)],
+        }, schema=simple_pa_schema)
+        table_write.write_arrow(expect_data)
+        table_commit.commit(table_write.prepare_commit())
+        table_write.close()
+        table_commit.close()
+
+        read_builder = table.new_read_builder()
+        table_scan = read_builder.new_scan()
+        table_read = read_builder.new_read()
+        actual_data = table_read.to_arrow(table_scan.plan().splits())
+        self.assertEqual(actual_data, expect_data)
+
+        # to test GenericRow ability
+        latest_snapshot = table_scan.snapshot_manager.get_latest_snapshot()
+        manifest_files = 
table_scan.manifest_list_manager.read_all(latest_snapshot)
+        manifest_entries = 
table_scan.manifest_file_manager.read(manifest_files[0].file_name,
+                                                                 lambda row: 
table_scan._bucket_filter(row))
+        min_value_stats = 
manifest_entries[0].file.value_stats.min_values.values
+        max_value_stats = 
manifest_entries[0].file.value_stats.max_values.values
+        expected_min_values = [col[0].as_py() for col in expect_data]
+        expected_max_values = [col[1].as_py() for col in expect_data]
+        self.assertEqual(min_value_stats, expected_min_values)
+        self.assertEqual(max_value_stats, expected_max_values)
+
     def test_mixed_add_and_delete_entries_same_partition(self):
         """Test record_count calculation with mixed ADD/DELETE entries in same 
partition."""
         pa_schema = pa.schema([
diff --git a/paimon-python/pypaimon/tests/schema_test.py 
b/paimon-python/pypaimon/tests/schema_test.py
index 97c09246ac..671f837117 100644
--- a/paimon-python/pypaimon/tests/schema_test.py
+++ b/paimon-python/pypaimon/tests/schema_test.py
@@ -20,18 +20,37 @@ import unittest
 
 import pyarrow
 
+from pypaimon import Schema
 from pypaimon.schema.data_types import (ArrayType, AtomicType, DataField,
                                         MapType, PyarrowFieldParser)
-from pypaimon import Schema
 from pypaimon.schema.table_schema import TableSchema
 
 
 class SchemaTestCase(unittest.TestCase):
     def test_types(self):
         data_fields = [
-            DataField(0, "name", AtomicType('INT'), 'desc  name'),
-            DataField(1, "arr", ArrayType(True, AtomicType('INT')), 'desc 
arr1'),
-            DataField(2, "map1",
+            DataField(0, "f0", AtomicType('TINYINT'), 'desc'),
+            DataField(1, "f1", AtomicType('SMALLINT'), 'desc'),
+            DataField(2, "f2", AtomicType('INT'), 'desc'),
+            DataField(3, "f3", AtomicType('BIGINT'), 'desc'),
+            DataField(4, "f4", AtomicType('FLOAT'), 'desc'),
+            DataField(5, "f5", AtomicType('DOUBLE'), 'desc'),
+            DataField(6, "f6", AtomicType('BOOLEAN'), 'desc'),
+            DataField(7, "f7", AtomicType('STRING'), 'desc'),
+            DataField(8, "f8", AtomicType('BINARY(12)'), 'desc'),
+            DataField(9, "f9", AtomicType('DECIMAL(10, 6)'), 'desc'),
+            DataField(10, "f10", AtomicType('BYTES'), 'desc'),
+            DataField(11, "f11", AtomicType('DATE'), 'desc'),
+            DataField(12, "f12", AtomicType('TIME(0)'), 'desc'),
+            DataField(13, "f13", AtomicType('TIME(3)'), 'desc'),
+            DataField(14, "f14", AtomicType('TIME(6)'), 'desc'),
+            DataField(15, "f15", AtomicType('TIME(9)'), 'desc'),
+            DataField(16, "f16", AtomicType('TIMESTAMP(0)'), 'desc'),
+            DataField(17, "f17", AtomicType('TIMESTAMP(3)'), 'desc'),
+            DataField(18, "f18", AtomicType('TIMESTAMP(6)'), 'desc'),
+            DataField(19, "f19", AtomicType('TIMESTAMP(9)'), 'desc'),
+            DataField(20, "arr", ArrayType(True, AtomicType('INT')), 'desc 
arr1'),
+            DataField(21, "map1",
                       MapType(False, AtomicType('INT', False),
                               MapType(False, AtomicType('INT', False), 
AtomicType('INT', False))),
                       'desc map1'),

(paimon) branch master updated: [Python] Enhance data type support (#6253)

Reply via email to