This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new f43a348970 [Python] Enhance data type support (#6253)
f43a348970 is described below
commit f43a348970ac636a167d1c5631f8abd79185be45
Author: ChengHui Chen <[email protected]>
AuthorDate: Mon Sep 15 10:07:39 2025 +0800
[Python] Enhance data type support (#6253)
---
paimon-python/pypaimon/schema/data_types.py | 122 ++++++++++++++-------
paimon-python/pypaimon/table/row/generic_row.py | 53 +++++----
.../pypaimon/tests/py36/ao_read_write_test.py | 63 ++++++++++-
paimon-python/pypaimon/tests/reader_basic_test.py | 67 ++++++++++-
paimon-python/pypaimon/tests/schema_test.py | 27 ++++-
5 files changed, 259 insertions(+), 73 deletions(-)
diff --git a/paimon-python/pypaimon/schema/data_types.py
b/paimon-python/pypaimon/schema/data_types.py
index b53a779b41..5255ac6b1e 100644
--- a/paimon-python/pypaimon/schema/data_types.py
+++ b/paimon-python/pypaimon/schema/data_types.py
@@ -23,6 +23,7 @@ from enum import Enum
from typing import Any, Dict, List, Optional, Union
import pyarrow
+from pyarrow import types
class AtomicInteger:
@@ -385,6 +386,7 @@ class PyarrowFieldParser:
@staticmethod
def from_paimon_type(data_type: DataType) -> pyarrow.DataType:
+ # Based on Paimon DataTypes Doc:
https://paimon.apache.org/docs/master/concepts/data-types/
if isinstance(data_type, AtomicType):
type_name = data_type.type.upper()
if type_name == 'TINYINT':
@@ -401,31 +403,67 @@ class PyarrowFieldParser:
return pyarrow.float64()
elif type_name == 'BOOLEAN':
return pyarrow.bool_()
- elif type_name == 'STRING':
+ elif type_name == 'STRING' or type_name.startswith('CHAR') or
type_name.startswith('VARCHAR'):
return pyarrow.string()
- elif type_name == 'BINARY':
+ elif type_name == 'BYTES' or type_name.startswith('VARBINARY'):
return pyarrow.binary()
+ elif type_name.startswith('BINARY'):
+ if type_name == 'BINARY':
+ return pyarrow.binary(1)
+ match = re.fullmatch(r'BINARY\((\d+)\)', type_name)
+ if match:
+ length = int(match.group(1))
+ if length > 0:
+ return pyarrow.binary(length)
+ elif type_name.startswith('DECIMAL'):
+ if type_name == 'DECIMAL':
+ return pyarrow.decimal128(10, 0) # default to 10, 0
+ match_ps = re.fullmatch(r'DECIMAL\((\d+),\s*(\d+)\)',
type_name)
+ if match_ps:
+ precision, scale = map(int, match_ps.groups())
+ return pyarrow.decimal128(precision, scale)
+ match_p = re.fullmatch(r'DECIMAL\((\d+)\)', type_name)
+ if match_p:
+ precision = int(match_p.group(1))
+ return pyarrow.decimal128(precision, 0)
+ if type_name.startswith('TIMESTAMP'):
+ # WITH_LOCAL_TIME_ZONE is ambiguous and not supported
+ if type_name == 'TIMESTAMP':
+ return pyarrow.timestamp('us', tz=None) # default to 6
+ match = re.fullmatch(r'TIMESTAMP\((\d+)\)', type_name)
+ if match:
+ precision = int(match.group(1))
+ if precision == 0:
+ return pyarrow.timestamp('s', tz=None)
+ elif 1 <= precision <= 3:
+ return pyarrow.timestamp('ms', tz=None)
+ elif 4 <= precision <= 6:
+ return pyarrow.timestamp('us', tz=None)
+ elif 7 <= precision <= 9:
+ return pyarrow.timestamp('ns', tz=None)
elif type_name == 'DATE':
return pyarrow.date32()
- elif type_name == 'TIMESTAMP':
- return pyarrow.timestamp('ms')
- elif type_name.startswith('DECIMAL'):
- match = re.match(r'DECIMAL\((\d+),\s*(\d+)\)', type_name)
+ if type_name.startswith('TIME'):
+ if type_name == 'TIME':
+ return pyarrow.time64('us') # default to 6
+ match = re.fullmatch(r'TIME\((\d+)\)', type_name)
if match:
- precision, scale = map(int, match.groups())
- return pyarrow.decimal128(precision, scale)
- else:
- return pyarrow.decimal128(38, 18)
- else:
- raise ValueError("Unsupported data type: {}".format(type_name))
+ precision = int(match.group(1))
+ if precision == 0:
+ return pyarrow.time32('s')
+ if 1 <= precision <= 3:
+ return pyarrow.time32('ms')
+ if 4 <= precision <= 6:
+ return pyarrow.time64('us')
+ if 7 <= precision <= 9:
+ return pyarrow.time64('ns')
elif isinstance(data_type, ArrayType):
return
pyarrow.list_(PyarrowFieldParser.from_paimon_type(data_type.element))
elif isinstance(data_type, MapType):
key_type = PyarrowFieldParser.from_paimon_type(data_type.key)
value_type = PyarrowFieldParser.from_paimon_type(data_type.value)
return pyarrow.map_(key_type, value_type)
- else:
- raise ValueError("Unsupported data type: {}".format(data_type))
+ raise ValueError("Unsupported data type: {}".format(data_type))
@staticmethod
def from_paimon_field(data_field: DataField) -> pyarrow.Field:
@@ -444,48 +482,52 @@ class PyarrowFieldParser:
@staticmethod
def to_paimon_type(pa_type: pyarrow.DataType, nullable: bool) -> DataType:
- type_name = str(pa_type)
- if type_name == "int8":
+ # Based on Arrow DataTypes Doc:
https://arrow.apache.org/docs/python/api/datatypes.html
+ # All safe mappings are already implemented, adding new mappings
requires rigorous evaluation
+ # to avoid potential data loss
+ type_name = None
+ if types.is_int8(pa_type):
type_name = 'TINYINT'
- elif type_name == "int16":
+ elif types.is_int16(pa_type):
type_name = 'SMALLINT'
- elif type_name == "int32":
+ elif types.is_int32(pa_type):
type_name = 'INT'
- elif type_name == "int64":
+ elif types.is_int64(pa_type):
type_name = 'BIGINT'
- elif type_name.startswith('float'):
+ elif types.is_float32(pa_type):
type_name = 'FLOAT'
- elif type_name.startswith('double'):
+ elif types.is_float64(pa_type):
type_name = 'DOUBLE'
- elif type_name.startswith('bool'):
+ elif types.is_boolean(pa_type):
type_name = 'BOOLEAN'
- elif type_name.startswith('string'):
+ elif types.is_string(pa_type):
type_name = 'STRING'
- elif type_name.startswith('binary'):
- type_name = 'BINARY'
- elif type_name.startswith('date'):
+ elif types.is_fixed_size_binary(pa_type):
+ type_name = f'BINARY({pa_type.byte_width})'
+ elif types.is_binary(pa_type):
+ type_name = 'BYTES'
+ elif types.is_decimal(pa_type):
+ type_name = f'DECIMAL({pa_type.precision}, {pa_type.scale})'
+ elif types.is_timestamp(pa_type) and pa_type.tz is None:
+ precision_mapping = {'s': 0, 'ms': 3, 'us': 6, 'ns': 9}
+ type_name = f'TIMESTAMP({precision_mapping[pa_type.unit]})'
+ elif types.is_date32(pa_type):
type_name = 'DATE'
- elif type_name.startswith('timestamp'):
- type_name = 'TIMESTAMP'
- elif type_name.startswith('decimal'):
- match = re.match(r'decimal\((\d+),\s*(\d+)\)', type_name)
- if match:
- precision, scale = map(int, match.groups())
- type_name = 'DECIMAL({},{})'.format(precision, scale)
- else:
- type_name = 'DECIMAL(38,18)'
- elif type_name.startswith('list'):
+ elif types.is_time(pa_type):
+ precision_mapping = {'s': 0, 'ms': 3, 'us': 6, 'ns': 9}
+ type_name = f'TIME({precision_mapping[pa_type.unit]})'
+ elif types.is_list(pa_type) or types.is_large_list(pa_type):
pa_type: pyarrow.ListType
element_type =
PyarrowFieldParser.to_paimon_type(pa_type.value_type, nullable)
return ArrayType(nullable, element_type)
- elif type_name.startswith('map'):
+ elif types.is_map(pa_type):
pa_type: pyarrow.MapType
key_type = PyarrowFieldParser.to_paimon_type(pa_type.key_type,
nullable)
value_type = PyarrowFieldParser.to_paimon_type(pa_type.item_type,
nullable)
return MapType(nullable, key_type, value_type)
- else:
- raise ValueError("Unknown type: {}".format(type_name))
- return AtomicType(type_name, nullable)
+ if type_name is not None:
+ return AtomicType(type_name, nullable)
+ raise ValueError("Unsupported pyarrow type: {}".format(pa_type))
@staticmethod
def to_paimon_field(field_idx: int, pa_field: pyarrow.Field) -> DataField:
diff --git a/paimon-python/pypaimon/table/row/generic_row.py
b/paimon-python/pypaimon/table/row/generic_row.py
index 14f42e806c..a7612168d9 100644
--- a/paimon-python/pypaimon/table/row/generic_row.py
+++ b/paimon-python/pypaimon/table/row/generic_row.py
@@ -18,7 +18,7 @@
import struct
from dataclasses import dataclass
-from datetime import datetime, timedelta, timezone
+from datetime import date, datetime, time, timedelta
from decimal import Decimal
from typing import Any, List
@@ -107,17 +107,17 @@ class GenericRowDeserializer:
return cls._parse_float(bytes_data, field_offset)
elif type_name in ['DOUBLE']:
return cls._parse_double(bytes_data, field_offset)
- elif type_name in ['VARCHAR', 'STRING', 'CHAR']:
+ elif type_name.startswith('CHAR') or type_name.startswith('VARCHAR')
or type_name == 'STRING':
return cls._parse_string(bytes_data, base_offset, field_offset)
- elif type_name in ['BINARY', 'VARBINARY', 'BYTES']:
+ elif type_name.startswith('BINARY') or
type_name.startswith('VARBINARY') or type_name == 'BYTES':
return cls._parse_binary(bytes_data, base_offset, field_offset)
- elif type_name in ['DECIMAL', 'NUMERIC']:
+ elif type_name.startswith('DECIMAL') or
type_name.startswith('NUMERIC'):
return cls._parse_decimal(bytes_data, base_offset, field_offset,
data_type)
- elif type_name in ['TIMESTAMP', 'TIMESTAMP_WITHOUT_TIME_ZONE']:
+ elif type_name.startswith('TIMESTAMP'):
return cls._parse_timestamp(bytes_data, base_offset, field_offset,
data_type)
elif type_name in ['DATE']:
return cls._parse_date(bytes_data, field_offset)
- elif type_name in ['TIME', 'TIME_WITHOUT_TIME_ZONE']:
+ elif type_name.startswith('TIME'):
return cls._parse_time(bytes_data, field_offset)
else:
return cls._parse_string(bytes_data, base_offset, field_offset)
@@ -213,19 +213,19 @@ class GenericRowDeserializer:
@classmethod
def _parse_timestamp(cls, bytes_data: bytes, base_offset: int,
field_offset: int, data_type: DataType) -> datetime:
millis = struct.unpack('<q', bytes_data[field_offset:field_offset +
8])[0]
- return datetime.fromtimestamp(millis / 1000.0, tz=timezone.utc)
+ return datetime.fromtimestamp(millis / 1000.0, tz=None)
@classmethod
- def _parse_date(cls, bytes_data: bytes, field_offset: int) -> datetime:
+ def _parse_date(cls, bytes_data: bytes, field_offset: int) -> date:
days = struct.unpack('<i', bytes_data[field_offset:field_offset +
4])[0]
- return datetime(1970, 1, 1) + timedelta(days=days)
+ return date(1970, 1, 1) + timedelta(days=days)
@classmethod
- def _parse_time(cls, bytes_data: bytes, field_offset: int) -> datetime:
+ def _parse_time(cls, bytes_data: bytes, field_offset: int) -> time:
millis = struct.unpack('<i', bytes_data[field_offset:field_offset +
4])[0]
seconds = millis // 1000
microseconds = (millis % 1000) * 1000
- return datetime(1970, 1, 1).replace(
+ return time(
hour=seconds // 3600,
minute=(seconds % 3600) // 60,
second=seconds % 60,
@@ -260,8 +260,8 @@ class GenericRowSerializer:
raise ValueError(f"BinaryRow only support AtomicType yet, meet
{field.type.__class__}")
type_name = field.type.type.upper()
- if type_name in ['VARCHAR', 'STRING', 'CHAR', 'BINARY',
'VARBINARY', 'BYTES']:
- if type_name in ['VARCHAR', 'STRING', 'CHAR']:
+ if any(type_name.startswith(p) for p in ['CHAR', 'VARCHAR',
'STRING', 'BINARY', 'VARBINARY', 'BYTES']):
+ if any(type_name.startswith(p) for p in ['CHAR', 'VARCHAR',
'STRING']):
value_bytes = str(value).encode('utf-8')
else:
value_bytes = bytes(value)
@@ -320,13 +320,13 @@ class GenericRowSerializer:
return cls._serialize_float(value) + b'\x00' * 4
elif type_name in ['DOUBLE']:
return cls._serialize_double(value)
- elif type_name in ['DECIMAL', 'NUMERIC']:
+ elif type_name.startswith('DECIMAL') or
type_name.startswith('NUMERIC'):
return cls._serialize_decimal(value, data_type)
- elif type_name in ['TIMESTAMP', 'TIMESTAMP_WITHOUT_TIME_ZONE']:
+ elif type_name.startswith('TIMESTAMP'):
return cls._serialize_timestamp(value)
elif type_name in ['DATE']:
return cls._serialize_date(value) + b'\x00' * 4
- elif type_name in ['TIME', 'TIME_WITHOUT_TIME_ZONE']:
+ elif type_name.startswith('TIME'):
return cls._serialize_time(value) + b'\x00' * 4
else:
raise TypeError(f"Unsupported type for serialization: {type_name}")
@@ -379,27 +379,26 @@ class GenericRowSerializer:
@classmethod
def _serialize_timestamp(cls, value: datetime) -> bytes:
- if value.tzinfo is None:
- value = value.replace(tzinfo=timezone.utc)
+ if value.tzinfo is not None:
+ raise RuntimeError("datetime tzinfo not supported yet")
millis = int(value.timestamp() * 1000)
return struct.pack('<q', millis)
@classmethod
- def _serialize_date(cls, value: datetime) -> bytes:
- if isinstance(value, datetime):
+ def _serialize_date(cls, value: date) -> bytes:
+ if isinstance(value, date):
epoch = datetime(1970, 1, 1).date()
- days = (value.date() - epoch).days
+ days = (value - epoch).days
else:
- raise RuntimeError("date should be datatime")
+ raise RuntimeError("value should be datatime.date")
return struct.pack('<i', days)
@classmethod
- def _serialize_time(cls, value: datetime) -> bytes:
- if isinstance(value, datetime):
- midnight = value.replace(hour=0, minute=0, second=0, microsecond=0)
- millis = int((value - midnight).total_seconds() * 1000)
- else:
+ def _serialize_time(cls, value: time) -> bytes:
+ if isinstance(value, time):
millis = value.hour * 3600000 + value.minute * 60000 +
value.second * 1000 + value.microsecond // 1000
+ else:
+ raise RuntimeError("value should be datatime.time")
return struct.pack('<i', millis)
@classmethod
diff --git a/paimon-python/pypaimon/tests/py36/ao_read_write_test.py
b/paimon-python/pypaimon/tests/py36/ao_read_write_test.py
index 0e8d97d47b..fcd05ee6cf 100644
--- a/paimon-python/pypaimon/tests/py36/ao_read_write_test.py
+++ b/paimon-python/pypaimon/tests/py36/ao_read_write_test.py
@@ -16,7 +16,8 @@ See the License for the specific language governing
permissions and
limitations under the License.
"""
import logging
-from datetime import datetime
+from datetime import datetime, date
+from decimal import Decimal
from unittest.mock import Mock
import pandas as pd
@@ -124,6 +125,66 @@ class RESTTableReadWritePy36Test(RESTCatalogBaseTest):
pd.testing.assert_frame_equal(
actual_df2.reset_index(drop=True), df2.reset_index(drop=True))
+ def test_full_data_types(self):
+ simple_pa_schema = pa.schema([
+ ('f0', pa.int8()),
+ ('f1', pa.int16()),
+ ('f2', pa.int32()),
+ ('f3', pa.int64()),
+ ('f4', pa.float32()),
+ ('f5', pa.float64()),
+ ('f6', pa.bool_()),
+ ('f7', pa.string()),
+ ('f8', pa.binary()),
+ ('f9', pa.binary(10)),
+ ('f10', pa.decimal128(10, 2)),
+ ('f11', pa.date32()),
+ ])
+ schema = Schema.from_pyarrow_schema(simple_pa_schema)
+ self.rest_catalog.create_table('default.test_full_data_types', schema,
False)
+ table = self.rest_catalog.get_table('default.test_full_data_types')
+
+ # to test read and write
+ write_builder = table.new_batch_write_builder()
+ table_write = write_builder.new_write()
+ table_commit = write_builder.new_commit()
+ expect_data = pa.Table.from_pydict({
+ 'f0': [-1, 2],
+ 'f1': [-1001, 1002],
+ 'f2': [-1000001, 1000002],
+ 'f3': [-10000000001, 10000000002],
+ 'f4': [-1001.05, 1002.05],
+ 'f5': [-1000001.05, 1000002.05],
+ 'f6': [False, True],
+ 'f7': ['Hello', 'World'],
+ 'f8': [b'\x01\x02\x03', b'pyarrow'],
+ 'f9': [b'exactly_10', b'pad'.ljust(10, b'\x00')],
+ 'f10': [Decimal('-987.65'), Decimal('12345.67')],
+ 'f11': [date(1999, 12, 31), date(2023, 1, 1)],
+ }, schema=simple_pa_schema)
+ table_write.write_arrow(expect_data)
+ table_commit.commit(table_write.prepare_commit())
+ table_write.close()
+ table_commit.close()
+
+ read_builder = table.new_read_builder()
+ table_scan = read_builder.new_scan()
+ table_read = read_builder.new_read()
+ actual_data = table_read.to_arrow(table_scan.plan().splits())
+ self.assertEqual(actual_data, expect_data)
+
+ # to test GenericRow ability
+ latest_snapshot = table_scan.snapshot_manager.get_latest_snapshot()
+ manifest_files =
table_scan.manifest_list_manager.read_all(latest_snapshot)
+ manifest_entries =
table_scan.manifest_file_manager.read(manifest_files[0].file_name,
+ lambda row:
table_scan._bucket_filter(row))
+ min_value_stats =
manifest_entries[0].file.value_stats.min_values.values
+ max_value_stats =
manifest_entries[0].file.value_stats.max_values.values
+ expected_min_values = [col[0].as_py() for col in expect_data]
+ expected_max_values = [col[1].as_py() for col in expect_data]
+ self.assertEqual(min_value_stats, expected_min_values)
+ self.assertEqual(max_value_stats, expected_max_values)
+
def test_mixed_add_and_delete_entries_same_partition(self):
"""Test record_count calculation with mixed ADD/DELETE entries in same
partition."""
pa_schema = pa.schema([
diff --git a/paimon-python/pypaimon/tests/reader_basic_test.py
b/paimon-python/pypaimon/tests/reader_basic_test.py
index ce5a0bc308..e66f7cb94a 100644
--- a/paimon-python/pypaimon/tests/reader_basic_test.py
+++ b/paimon-python/pypaimon/tests/reader_basic_test.py
@@ -20,7 +20,8 @@ import os
import shutil
import tempfile
import unittest
-from datetime import datetime
+from datetime import datetime, date, time
+from decimal import Decimal
from unittest.mock import Mock
import pandas as pd
@@ -159,6 +160,70 @@ class ReaderBasicTest(unittest.TestCase):
pd.testing.assert_frame_equal(
actual_df2.reset_index(drop=True), df2.reset_index(drop=True))
+ def test_full_data_types(self):
+ simple_pa_schema = pa.schema([
+ ('f0', pa.int8()),
+ ('f1', pa.int16()),
+ ('f2', pa.int32()),
+ ('f3', pa.int64()),
+ ('f4', pa.float32()),
+ ('f5', pa.float64()),
+ ('f6', pa.bool_()),
+ ('f7', pa.string()),
+ ('f8', pa.binary()),
+ ('f9', pa.binary(10)),
+ ('f10', pa.decimal128(10, 2)),
+ ('f11', pa.timestamp('ms')),
+ ('f12', pa.date32()),
+ ('f13', pa.time64('us')),
+ ])
+ schema = Schema.from_pyarrow_schema(simple_pa_schema)
+ self.catalog.create_table('default.test_full_data_types', schema,
False)
+ table = self.catalog.get_table('default.test_full_data_types')
+
+ # to test read and write
+ write_builder = table.new_batch_write_builder()
+ table_write = write_builder.new_write()
+ table_commit = write_builder.new_commit()
+ expect_data = pa.Table.from_pydict({
+ 'f0': [-1, 2],
+ 'f1': [-1001, 1002],
+ 'f2': [-1000001, 1000002],
+ 'f3': [-10000000001, 10000000002],
+ 'f4': [-1001.05, 1002.05],
+ 'f5': [-1000001.05, 1000002.05],
+ 'f6': [False, True],
+ 'f7': ['Hello', 'World'],
+ 'f8': [b'\x01\x02\x03', b'pyarrow'],
+ 'f9': [b'exactly_10', b'pad'.ljust(10, b'\x00')],
+ 'f10': [Decimal('-987.65'), Decimal('12345.67')],
+ 'f11': [datetime(2000, 1, 1, 0, 0, 0, 123456), datetime(2023, 10,
27, 8, 0, 0)],
+ 'f12': [date(1999, 12, 31), date(2023, 1, 1)],
+ 'f13': [time(10, 30, 0), time(23, 59, 59, 999000)],
+ }, schema=simple_pa_schema)
+ table_write.write_arrow(expect_data)
+ table_commit.commit(table_write.prepare_commit())
+ table_write.close()
+ table_commit.close()
+
+ read_builder = table.new_read_builder()
+ table_scan = read_builder.new_scan()
+ table_read = read_builder.new_read()
+ actual_data = table_read.to_arrow(table_scan.plan().splits())
+ self.assertEqual(actual_data, expect_data)
+
+ # to test GenericRow ability
+ latest_snapshot = table_scan.snapshot_manager.get_latest_snapshot()
+ manifest_files =
table_scan.manifest_list_manager.read_all(latest_snapshot)
+ manifest_entries =
table_scan.manifest_file_manager.read(manifest_files[0].file_name,
+ lambda row:
table_scan._bucket_filter(row))
+ min_value_stats =
manifest_entries[0].file.value_stats.min_values.values
+ max_value_stats =
manifest_entries[0].file.value_stats.max_values.values
+ expected_min_values = [col[0].as_py() for col in expect_data]
+ expected_max_values = [col[1].as_py() for col in expect_data]
+ self.assertEqual(min_value_stats, expected_min_values)
+ self.assertEqual(max_value_stats, expected_max_values)
+
def test_mixed_add_and_delete_entries_same_partition(self):
"""Test record_count calculation with mixed ADD/DELETE entries in same
partition."""
pa_schema = pa.schema([
diff --git a/paimon-python/pypaimon/tests/schema_test.py
b/paimon-python/pypaimon/tests/schema_test.py
index 97c09246ac..671f837117 100644
--- a/paimon-python/pypaimon/tests/schema_test.py
+++ b/paimon-python/pypaimon/tests/schema_test.py
@@ -20,18 +20,37 @@ import unittest
import pyarrow
+from pypaimon import Schema
from pypaimon.schema.data_types import (ArrayType, AtomicType, DataField,
MapType, PyarrowFieldParser)
-from pypaimon import Schema
from pypaimon.schema.table_schema import TableSchema
class SchemaTestCase(unittest.TestCase):
def test_types(self):
data_fields = [
- DataField(0, "name", AtomicType('INT'), 'desc name'),
- DataField(1, "arr", ArrayType(True, AtomicType('INT')), 'desc
arr1'),
- DataField(2, "map1",
+ DataField(0, "f0", AtomicType('TINYINT'), 'desc'),
+ DataField(1, "f1", AtomicType('SMALLINT'), 'desc'),
+ DataField(2, "f2", AtomicType('INT'), 'desc'),
+ DataField(3, "f3", AtomicType('BIGINT'), 'desc'),
+ DataField(4, "f4", AtomicType('FLOAT'), 'desc'),
+ DataField(5, "f5", AtomicType('DOUBLE'), 'desc'),
+ DataField(6, "f6", AtomicType('BOOLEAN'), 'desc'),
+ DataField(7, "f7", AtomicType('STRING'), 'desc'),
+ DataField(8, "f8", AtomicType('BINARY(12)'), 'desc'),
+ DataField(9, "f9", AtomicType('DECIMAL(10, 6)'), 'desc'),
+ DataField(10, "f10", AtomicType('BYTES'), 'desc'),
+ DataField(11, "f11", AtomicType('DATE'), 'desc'),
+ DataField(12, "f12", AtomicType('TIME(0)'), 'desc'),
+ DataField(13, "f13", AtomicType('TIME(3)'), 'desc'),
+ DataField(14, "f14", AtomicType('TIME(6)'), 'desc'),
+ DataField(15, "f15", AtomicType('TIME(9)'), 'desc'),
+ DataField(16, "f16", AtomicType('TIMESTAMP(0)'), 'desc'),
+ DataField(17, "f17", AtomicType('TIMESTAMP(3)'), 'desc'),
+ DataField(18, "f18", AtomicType('TIMESTAMP(6)'), 'desc'),
+ DataField(19, "f19", AtomicType('TIMESTAMP(9)'), 'desc'),
+ DataField(20, "arr", ArrayType(True, AtomicType('INT')), 'desc
arr1'),
+ DataField(21, "map1",
MapType(False, AtomicType('INT', False),
MapType(False, AtomicType('INT', False),
AtomicType('INT', False))),
'desc map1'),