This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new 0191b0b9b feat(python): Replace usage of ComplexObjectSerializer with
DataClassSerializer (#2395)
0191b0b9b is described below
commit 0191b0b9b2dc51f6b446e738d852816e9d61aa23
Author: Emre Şafak <[email protected]>
AuthorDate: Mon Jul 7 23:39:28 2025 -0400
feat(python): Replace usage of ComplexObjectSerializer with
DataClassSerializer (#2395)
I realized I did not remove usages of `ComplexObjectSerializer` when I
submitted #2389
## What does this PR do?
* Remove ComplexObjectSerializer from pyfory/__init__.py.
* Update _registry.py to use DataClassSerializer directly instead of
ComplexObjectSerializer.
* Remove ComplexObjectSerializer definition from _struct.py.
## Does this PR introduce any user-facing change?
No
## Benchmark
N/A
---------
Co-authored-by: Emre Şafak <[email protected]>
---
python/pyfory/__init__.py | 3 --
python/pyfory/_registry.py | 76 ++++++++++-----------------------------------
python/pyfory/_struct.py | 24 +-------------
python/pyfory/serializer.py | 57 ++++++++++++++++------------------
4 files changed, 45 insertions(+), 115 deletions(-)
diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py
index e879d67c5..caea3efbe 100644
--- a/python/pyfory/__init__.py
+++ b/python/pyfory/__init__.py
@@ -31,9 +31,6 @@ from pyfory._registry import TypeInfo
if ENABLE_FORY_CYTHON_SERIALIZATION:
from pyfory._serialization import Fory, TypeInfo # noqa: F401,F811
-from pyfory._struct import ( # noqa: F401,F403,F811 # pylint:
disable=unused-import
- ComplexObjectSerializer,
-)
from pyfory.serializer import * # noqa: F401,F403 # pylint:
disable=unused-import
from pyfory.type import ( # noqa: F401 # pylint: disable=unused-import
record_class_factory,
diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py
index e19010be2..dd999ce2c 100644
--- a/python/pyfory/_registry.py
+++ b/python/pyfory/_registry.py
@@ -60,7 +60,6 @@ from pyfory.serializer import (
PickleSerializer,
DataClassSerializer,
)
-from pyfory._struct import ComplexObjectSerializer
from pyfory.meta.metastring import MetaStringEncoder, MetaStringDecoder
from pyfory.type import (
TypeId,
@@ -118,10 +117,7 @@ else:
self.dynamic_type = dynamic_type
def __repr__(self):
- return (
- f"TypeInfo(cls={self.cls}, type_id={self.type_id}, "
- f"serializer={self.serializer})"
- )
+ return f"TypeInfo(cls={self.cls}, type_id={self.type_id},
serializer={self.serializer})"
def decode_namespace(self) -> str:
if self.namespace_bytes is None:
@@ -230,9 +226,7 @@ class TypeResolver:
register(float, type_id=TypeId.FLOAT64, serializer=Float64Serializer)
register(str, type_id=TypeId.STRING, serializer=StringSerializer)
# TODO(chaokunyang) DURATION DECIMAL
- register(
- datetime.datetime, type_id=TypeId.TIMESTAMP,
serializer=TimestampSerializer
- )
+ register(datetime.datetime, type_id=TypeId.TIMESTAMP,
serializer=TimestampSerializer)
register(datetime.date, type_id=TypeId.LOCAL_DATE,
serializer=DateSerializer)
register(bytes, type_id=TypeId.BINARY, serializer=BytesSerializer)
for itemsize, ftype, typeid in
PyArraySerializer.typecode_dict.values():
@@ -241,9 +235,7 @@ class TypeResolver:
type_id=typeid,
serializer=PyArraySerializer(self.fory, ftype, typeid),
)
- register(
- array.array, type_id=DYNAMIC_TYPE_ID,
serializer=DynamicPyArraySerializer
- )
+ register(array.array, type_id=DYNAMIC_TYPE_ID,
serializer=DynamicPyArraySerializer)
if np:
# overwrite pyarray with same type id.
# if pyarray are needed, one must annotate that value with
XXXArrayType
@@ -275,9 +267,7 @@ class TypeResolver:
type_id=TypeId.ARROW_RECORD_BATCH,
serializer=ArrowRecordBatchSerializer,
)
- register(
- pa.Table, type_id=TypeId.ARROW_TABLE,
serializer=ArrowTableSerializer
- )
+ register(pa.Table, type_id=TypeId.ARROW_TABLE,
serializer=ArrowTableSerializer)
except Exception:
pass
@@ -322,20 +312,14 @@ class TypeResolver:
if n_params == 0 and typename is None:
type_id = self._next_type_id()
if n_params == 2:
- raise TypeError(
- f"type name {typename} and id {type_id} should not be set at
the same time"
- )
+ raise TypeError(f"type name {typename} and id {type_id} should not
be set at the same time")
if type_id not in {0, None}:
# multiple type can have same tpe id
if type_id in self._type_id_to_typeinfo and cls in
self._types_info:
raise TypeError(f"{cls} registered already")
elif cls in self._types_info:
raise TypeError(f"{cls} registered already")
- register_type = (
- self._register_xtype
- if self.fory.language == Language.XLANG
- else self._register_pytype
- )
+ register_type = self._register_xtype if self.fory.language ==
Language.XLANG else self._register_pytype
return register_type(
cls,
type_id=type_id,
@@ -358,22 +342,12 @@ class TypeResolver:
if serializer is None:
if issubclass(cls, enum.Enum):
serializer = EnumSerializer(self.fory, cls)
- type_id = (
- TypeId.NAMED_ENUM
- if type_id is None
- else ((type_id << 8) + TypeId.ENUM)
- )
+ type_id = TypeId.NAMED_ENUM if type_id is None else ((type_id
<< 8) + TypeId.ENUM)
else:
- serializer = ComplexObjectSerializer(self.fory, cls)
- type_id = (
- TypeId.NAMED_STRUCT
- if type_id is None
- else ((type_id << 8) + TypeId.STRUCT)
- )
+ serializer = DataClassSerializer(self.fory, cls, xlang=True)
+ type_id = TypeId.NAMED_STRUCT if type_id is None else
((type_id << 8) + TypeId.STRUCT)
elif not internal:
- type_id = (
- TypeId.NAMED_EXT if type_id is None else ((type_id << 8) +
TypeId.EXT)
- )
+ type_id = TypeId.NAMED_EXT if type_id is None else ((type_id << 8)
+ TypeId.EXT)
return self.__register_type(
cls,
type_id=type_id,
@@ -426,15 +400,11 @@ class TypeResolver:
ns_meta_bytes =
self.metastring_resolver.get_metastr_bytes(ns_metastr)
type_metastr = self.typename_encoder.encode(typename)
type_meta_bytes =
self.metastring_resolver.get_metastr_bytes(type_metastr)
- typeinfo = TypeInfo(
- cls, type_id, serializer, ns_meta_bytes, type_meta_bytes,
dynamic_type
- )
+ typeinfo = TypeInfo(cls, type_id, serializer, ns_meta_bytes,
type_meta_bytes, dynamic_type)
self._named_type_to_typeinfo[(namespace, typename)] = typeinfo
self._ns_type_to_typeinfo[(ns_meta_bytes, type_meta_bytes)] =
typeinfo
self._types_info[cls] = typeinfo
- if type_id > 0 and (
- self.language == Language.PYTHON or not
TypeId.is_namespaced_type(type_id)
- ):
+ if type_id > 0 and (self.language == Language.PYTHON or not
TypeId.is_namespaced_type(type_id)):
if type_id not in self._type_id_to_typeinfo or not internal:
self._type_id_to_typeinfo[type_id] = typeinfo
self._types_info[cls] = typeinfo
@@ -479,9 +449,7 @@ class TypeResolver:
return type_info
elif not create:
return None
- if self.language != Language.PYTHON or (
- self.require_registration and not issubclass(cls, Enum)
- ):
+ if self.language != Language.PYTHON or (self.require_registration and
not issubclass(cls, Enum)):
raise TypeUnregisteredError(f"{cls} not registered")
logger.info("Type %s not registered", cls)
serializer = self._create_serializer(cls)
@@ -495,9 +463,7 @@ class TypeResolver:
if isinstance(serializer, DataClassSerializer):
type_id = TypeId.NAMED_STRUCT
if type_id is None:
- raise TypeUnregisteredError(
- f"{cls} must be registered using `fory.register_type` API"
- )
+ raise TypeUnregisteredError(f"{cls} must be registered using
`fory.register_type` API")
return self.__register_type(
cls,
type_id=type_id,
@@ -509,11 +475,7 @@ class TypeResolver:
def _create_serializer(self, cls):
for clz in cls.__mro__:
type_info = self._types_info.get(clz)
- if (
- type_info
- and type_info.serializer
- and type_info.serializer.support_subclass()
- ):
+ if type_info and type_info.serializer and
type_info.serializer.support_subclass():
serializer = type(type_info.serializer)(self.fory, cls)
break
else:
@@ -550,12 +512,8 @@ class TypeResolver:
internal_type_id = type_id & 0xFF
buffer.write_varuint32(type_id)
if TypeId.is_namespaced_type(internal_type_id):
- self.metastring_resolver.write_meta_string_bytes(
- buffer, typeinfo.namespace_bytes
- )
- self.metastring_resolver.write_meta_string_bytes(
- buffer, typeinfo.typename_bytes
- )
+ self.metastring_resolver.write_meta_string_bytes(buffer,
typeinfo.namespace_bytes)
+ self.metastring_resolver.write_meta_string_bytes(buffer,
typeinfo.typename_bytes)
def read_typeinfo(self, buffer):
type_id = buffer.read_varuint32()
diff --git a/python/pyfory/_struct.py b/python/pyfory/_struct.py
index 0746b66fd..6c455424e 100644
--- a/python/pyfory/_struct.py
+++ b/python/pyfory/_struct.py
@@ -19,7 +19,6 @@ import datetime
import enum
import logging
-from pyfory.serializer import Serializer
from pyfory.type import (
TypeVisitor,
infer_field,
@@ -141,9 +140,7 @@ def _sort_fields(type_resolver, field_names, serializers):
elif is_map_type(serializer.type_):
container = map_types
elif (
- type_id in {TypeId.STRING}
- or is_primitive_array_type(type_id)
- or is_subclass(serializer.type_, enum.Enum)
+ type_id in {TypeId.STRING} or is_primitive_array_type(type_id) or
is_subclass(serializer.type_, enum.Enum)
) or serializer.type_ in _time_types:
container = final_types
else:
@@ -172,25 +169,6 @@ def _sort_fields(type_resolver, field_names, serializers):
return [t[1] for t in all_types], [t[2] for t in all_types]
-import warnings
-
-# Removed DataClassSerializer from here to break the cycle for the alias
target.
-# Other serializers like ListSerializer, MapSerializer, Serializer are still
imported at the top.
-
-
-class ComplexObjectSerializer(Serializer):
- def __new__(cls, fory, clz):
- from pyfory.serializer import DataClassSerializer # Local import
-
- warnings.warn(
- "`ComplexObjectSerializer` is deprecated and will be removed in a
future version. "
- "Use `DataClassSerializer(fory, clz, xlang=True)` instead.",
- DeprecationWarning,
- stacklevel=2,
- )
- return DataClassSerializer(fory, clz, xlang=True)
-
-
class StructHashVisitor(TypeVisitor):
def __init__(
self,
diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py
index fe466ced8..4a24829b8 100644
--- a/python/pyfory/serializer.py
+++ b/python/pyfory/serializer.py
@@ -20,6 +20,7 @@ import itertools
import os
import pickle
import typing
+import warnings
from weakref import WeakValueDictionary
import pyfory.lib.mmh3
@@ -305,21 +306,16 @@ class DataClassSerializer(Serializer):
visitor = ComplexTypeVisitor(fory)
for index, key in enumerate(self._field_names):
# Changed from self.fory.infer_field to infer_field
- serializer = infer_field(
- key, self._type_hints[key], visitor, types_path=[]
- )
+ serializer = infer_field(key, self._type_hints[key], visitor,
types_path=[])
self._serializers[index] = serializer
- self._serializers, self._field_names = _sort_fields(
- fory.type_resolver, self._field_names, self._serializers
- )
+ self._serializers, self._field_names =
_sort_fields(fory.type_resolver, self._field_names, self._serializers)
self._hash = 0 # Will be computed on first xwrite/xread
if self.fory.language == Language.PYTHON:
import logging # Import here to avoid circular dependency
logger = logging.getLogger(__name__)
logger.warning(
- "Type of class %s shouldn't be serialized using
cross-language "
- "serializer",
+ "Type of class %s shouldn't be serialized using
cross-language serializer",
clz,
)
else:
@@ -352,18 +348,16 @@ class DataClassSerializer(Serializer):
stmts.append(f"{field_value} = {value}.{field_name}")
if field_type is bool:
stmts.extend(gen_write_nullable_basic_stmts(buffer,
field_value, bool))
- elif field_type == int:
+ elif field_type is int:
stmts.extend(gen_write_nullable_basic_stmts(buffer,
field_value, int))
- elif field_type == float:
+ elif field_type is float:
stmts.extend(gen_write_nullable_basic_stmts(buffer,
field_value, float))
- elif field_type == str:
+ elif field_type is str:
stmts.extend(gen_write_nullable_basic_stmts(buffer,
field_value, str))
else:
stmts.append(f"{fory}.write_ref_pyobject({buffer},
{field_value})")
self._write_method_code, func = compile_function(
- f"write_{self.type_.__module__}_{self.type_.__qualname__}".replace(
- ".", "_"
- ),
+
f"write_{self.type_.__module__}_{self.type_.__qualname__}".replace(".", "_"),
[buffer, value],
stmts,
context,
@@ -405,11 +399,11 @@ class DataClassSerializer(Serializer):
field_type = self._type_hints[field_name]
if field_type is bool:
stmts.extend(gen_read_nullable_basic_stmts(buffer, bool,
set_action))
- elif field_type == int:
+ elif field_type is int:
stmts.extend(gen_read_nullable_basic_stmts(buffer, int,
set_action))
- elif field_type == float:
+ elif field_type is float:
stmts.extend(gen_read_nullable_basic_stmts(buffer, float,
set_action))
- elif field_type == str:
+ elif field_type is str:
stmts.extend(gen_read_nullable_basic_stmts(buffer, str,
set_action))
else:
stmts.append(f"{obj}.{field_name} =
{fory}.read_ref_pyobject({buffer})")
@@ -432,8 +426,7 @@ class DataClassSerializer(Serializer):
hash_ = buffer.read_int32()
if hash_ != self._hash:
raise TypeNotCompatibleError(
- f"Hash {hash_} is not consistent with {self._hash} "
- f"for type {self.type_}",
+ f"Hash {hash_} is not consistent with {self._hash} for type
{self.type_}",
)
obj = self.type_.__new__(self.type_)
self.fory.ref_resolver.reference(obj)
@@ -448,9 +441,7 @@ class DataClassSerializer(Serializer):
def xwrite(self, buffer: Buffer, value):
if not self._xlang:
- raise TypeError(
- "xwrite can only be called when DataClassSerializer is in
xlang mode"
- )
+ raise TypeError("xwrite can only be called when
DataClassSerializer is in xlang mode")
if self._hash == 0:
self._hash = _get_hash(self.fory, self._field_names,
self._type_hints)
buffer.write_int32(self._hash)
@@ -461,16 +452,13 @@ class DataClassSerializer(Serializer):
def xread(self, buffer):
if not self._xlang:
- raise TypeError(
- "xread can only be called when DataClassSerializer is in xlang
mode"
- )
+ raise TypeError("xread can only be called when DataClassSerializer
is in xlang mode")
if self._hash == 0:
self._hash = _get_hash(self.fory, self._field_names,
self._type_hints)
hash_ = buffer.read_int32()
if hash_ != self._hash:
raise TypeNotCompatibleError(
- f"Hash {hash_} is not consistent with {self._hash} "
- f"for type {self.type_}",
+ f"Hash {hash_} is not consistent with {self._hash} for type
{self.type_}",
)
obj = self.type_.__new__(self.type_)
self.fory.ref_resolver.reference(obj)
@@ -638,9 +626,7 @@ class Numpy1DArraySerializer(Serializer):
def __init__(self, fory, ftype, dtype):
super().__init__(fory, ftype)
self.dtype = dtype
- self.itemsize, self.format, self.typecode, self.type_id =
_np_dtypes_dict[
- self.dtype
- ]
+ self.itemsize, self.format, self.typecode, self.type_id =
_np_dtypes_dict[self.dtype]
def xwrite(self, buffer, value):
assert value.itemsize == self.itemsize
@@ -731,3 +717,14 @@ class PickleSerializer(Serializer):
def read(self, buffer):
return self.fory.handle_unsupported_read(buffer)
+
+
+class ComplexObjectSerializer(DataClassSerializer):
+ def __new__(cls, fory, clz):
+ warnings.warn(
+ "`ComplexObjectSerializer` is deprecated and will be removed in a
future version. "
+ "Use `DataClassSerializer(fory, clz, xlang=True)` instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ return DataClassSerializer(fory, clz, xlang=True)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]