penguin-wwy commented on code in PR #1690:
URL: https://github.com/apache/fury/pull/1690#discussion_r1898908032


##########
python/pyfury/_registry.py:
##########
@@ -0,0 +1,627 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import array
+import dataclasses
+import datetime
+import enum
+import functools
+import logging
+from typing import TypeVar, Union
+from enum import Enum
+
+from pyfury._serialization import ENABLE_FURY_CYTHON_SERIALIZATION
+from pyfury import Language
+from pyfury.error import TypeUnregisteredError
+
+from pyfury.serializer import (
+    Serializer,
+    Numpy1DArraySerializer,
+    NDArraySerializer,
+    PyArraySerializer,
+    DynamicPyArraySerializer,
+    _PickleStub,
+    PickleStrongCacheStub,
+    PickleCacheStub,
+    NoneSerializer,
+    BooleanSerializer,
+    ByteSerializer,
+    Int16Serializer,
+    Int32Serializer,
+    Int64Serializer,
+    DynamicIntSerializer,
+    FloatSerializer,
+    DoubleSerializer,
+    DynamicFloatSerializer,
+    StringSerializer,
+    DateSerializer,
+    TimestampSerializer,
+    BytesSerializer,
+    ListSerializer,
+    TupleSerializer,
+    MapSerializer,
+    SetSerializer,
+    EnumSerializer,
+    SliceSerializer,
+    PickleCacheSerializer,
+    PickleStrongCacheSerializer,
+    PickleSerializer,
+)
+from pyfury._struct import ComplexObjectSerializer
+from pyfury.buffer import Buffer
+from pyfury.meta.metastring import MetaStringEncoder, MetaStringDecoder
+from pyfury.type import (
+    TypeId,
+    Int8Type,
+    Int16Type,
+    Int32Type,
+    Int64Type,
+    Float32Type,
+    Float64Type,
+    load_class,
+)
+from pyfury._fury import (
+    DYNAMIC_TYPE_ID,
+    # preserve 0 as flag for class id not set in ClassInfo`
+    NO_CLASS_ID,
+    PYINT_CLASS_ID,
+    PYFLOAT_CLASS_ID,
+    PYBOOL_CLASS_ID,
+    STRING_CLASS_ID,
+    PICKLE_CLASS_ID,
+    PICKLE_STRONG_CACHE_CLASS_ID,
+    PICKLE_CACHE_CLASS_ID,
+)
+
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
+logger = logging.getLogger(__name__)
+
+
+if ENABLE_FURY_CYTHON_SERIALIZATION:
+    from pyfury._serialization import ClassInfo
+else:
+
+    class ClassInfo:
+        __slots__ = (
+            "cls",
+            "type_id",
+            "serializer",
+            "namespace_bytes",
+            "typename_bytes",
+            "dynamic_type",
+        )
+
+        def __init__(
+            self,
+            cls: type = None,
+            type_id: int = NO_CLASS_ID,
+            serializer: Serializer = None,
+            namespace_bytes=None,
+            typename_bytes=None,
+            dynamic_type: bool = False,
+        ):
+            self.cls = cls
+            self.type_id = type_id
+            self.serializer = serializer
+            self.namespace_bytes = namespace_bytes
+            self.typename_bytes = typename_bytes
+            self.dynamic_type = dynamic_type
+
+        def __repr__(self):
+            return (
+                f"ClassInfo(cls={self.cls}, type_id={self.type_id}, "
+                f"serializer={self.serializer})"
+            )
+
+
+class ClassResolver:
+    __slots__ = (
+        "fury",
+        "_metastr_to_str",
+        "_type_id_counter",
+        "_classes_info",
+        "_hash_to_metastring",
+        "_metastr_to_class",
+        "_hash_to_classinfo",
+        "_dynamic_id_to_classinfo_list",
+        "_dynamic_id_to_metastr_list",
+        "_dynamic_write_string_id",
+        "_dynamic_written_metastr",
+        "_ns_type_to_classinfo",
+        "_named_type_to_classinfo",
+        "namespace_encoder",
+        "namespace_decoder",
+        "typename_encoder",
+        "typename_decoder",
+        "require_registration",
+        "metastring_resolver",
+        "language",
+        "_type_id_to_classinfo",
+    )
+
+    def __init__(self, fury):
+        self.fury = fury
+        self.metastring_resolver = fury.metastring_resolver
+        self.language = fury.language
+        self.require_registration = fury.require_class_registration
+        self._metastr_to_str = dict()
+        self._metastr_to_class = dict()
+        self._hash_to_metastring = dict()
+        self._hash_to_classinfo = dict()
+        self._dynamic_written_metastr = []
+        self._type_id_to_classinfo = dict()
+        self._type_id_counter = PICKLE_CACHE_CLASS_ID + 1
+        self._dynamic_write_string_id = 0
+        # hold objects to avoid gc, since `flat_hash_map/vector` doesn't
+        # hold python reference.
+        self._classes_info = dict()
+        self._ns_type_to_classinfo = dict()
+        self._named_type_to_classinfo = dict()
+        self.namespace_encoder = MetaStringEncoder(".", "_")
+        self.namespace_decoder = MetaStringDecoder(".", "_")
+        self.typename_encoder = MetaStringEncoder("$", "_")
+        self.typename_decoder = MetaStringDecoder("$", "_")
+
+    def initialize(self):
+        if self.fury.language == Language.PYTHON:
+            self._initialize_py()
+        else:
+            self._initialize_xlang()
+
+    def _initialize_py(self):
+        register = functools.partial(self._register_type, internal=True)
+        register(int, type_id=PYINT_CLASS_ID, serializer=Int64Serializer)
+        register(float, type_id=PYFLOAT_CLASS_ID, serializer=DoubleSerializer)
+        register(bool, type_id=PYBOOL_CLASS_ID, serializer=BooleanSerializer)
+        register(str, type_id=STRING_CLASS_ID, serializer=StringSerializer)
+        register(_PickleStub, type_id=PICKLE_CLASS_ID, 
serializer=PickleSerializer)
+        register(
+            PickleStrongCacheStub,
+            type_id=PICKLE_STRONG_CACHE_CLASS_ID,
+            serializer=PickleStrongCacheSerializer(self.fury),
+        )
+        register(
+            PickleCacheStub,
+            type_id=PICKLE_CACHE_CLASS_ID,
+            serializer=PickleCacheSerializer(self.fury),
+        )
+        register(type(None), serializer=NoneSerializer)
+        register(Int8Type, serializer=ByteSerializer)
+        register(Int16Type, serializer=Int16Serializer)
+        register(Int32Type, serializer=Int32Serializer)
+        register(Int64Type, serializer=Int64Serializer)
+        register(Float32Type, serializer=FloatSerializer)
+        register(Float64Type, serializer=DoubleSerializer)
+        register(datetime.date, serializer=DateSerializer)
+        register(datetime.datetime, serializer=TimestampSerializer)
+        register(bytes, serializer=BytesSerializer)
+        register(list, serializer=ListSerializer)
+        register(tuple, serializer=TupleSerializer)
+        register(dict, serializer=MapSerializer)
+        register(set, serializer=SetSerializer)
+        register(enum.Enum, serializer=EnumSerializer)
+        register(slice, serializer=SliceSerializer)
+        try:
+            import pyarrow as pa
+            from pyfury.format.serializer import (
+                ArrowRecordBatchSerializer,
+                ArrowTableSerializer,
+            )
+
+            register(pa.RecordBatch, serializer=ArrowRecordBatchSerializer)
+            register(pa.Table, serializer=ArrowTableSerializer)
+        except Exception:
+            pass
+        for size, ftype, type_id in PyArraySerializer.typecode_dict.values():
+            register(ftype, serializer=PyArraySerializer(self.fury, ftype, 
type_id))
+        register(array.array, serializer=DynamicPyArraySerializer)
+        if np:
+            register(np.ndarray, serializer=NDArraySerializer)
+
+    def _initialize_xlang(self):
+        register = functools.partial(self._register_type, internal=True)
+        register(bool, type_id=TypeId.BOOL, serializer=BooleanSerializer)
+        register(Int8Type, type_id=TypeId.INT8, serializer=ByteSerializer)
+        register(Int16Type, type_id=TypeId.INT16, serializer=Int16Serializer)
+        register(Int32Type, type_id=TypeId.INT32, serializer=Int32Serializer)
+        register(Int64Type, type_id=TypeId.INT64, serializer=Int64Serializer)
+        register(int, type_id=DYNAMIC_TYPE_ID, serializer=DynamicIntSerializer)
+        register(
+            Float32Type,
+            type_id=TypeId.FLOAT32,
+            serializer=FloatSerializer,
+        )
+        register(
+            Float64Type,
+            type_id=TypeId.FLOAT64,
+            serializer=DoubleSerializer,
+        )
+        register(float, type_id=DYNAMIC_TYPE_ID, 
serializer=DynamicFloatSerializer)
+        register(str, type_id=TypeId.STRING, serializer=StringSerializer)
+        # TODO(chaokunyang) DURATION DECIMAL
+        register(
+            datetime.datetime, type_id=TypeId.TIMESTAMP, 
serializer=TimestampSerializer
+        )
+        register(datetime.date, type_id=TypeId.LOCAL_DATE, 
serializer=DateSerializer)
+        register(bytes, type_id=TypeId.BINARY, serializer=BytesSerializer)
+        for itemsize, ftype, typeid in 
PyArraySerializer.typecode_dict.values():
+            register(
+                ftype,
+                type_id=typeid,
+                serializer=PyArraySerializer(self.fury, ftype, typeid),
+            )
+        register(
+            array.array, type_id=DYNAMIC_TYPE_ID, 
serializer=DynamicPyArraySerializer
+        )
+        if np:
+            # overwrite pyarray  with same type id.
+            # if pyarray are needed, one must annotate that value with 
XXXArrayType
+            # as a field of a struct.
+            for dtype, (
+                itemsize,
+                format,
+                ftype,
+                typeid,
+            ) in Numpy1DArraySerializer.dtypes_dict.items():
+                register(
+                    ftype,
+                    type_id=typeid,
+                    serializer=Numpy1DArraySerializer(self.fury, ftype, dtype),
+                )
+            register(np.ndarray, type_id=DYNAMIC_TYPE_ID, 
serializer=NDArraySerializer)
+        register(list, type_id=TypeId.LIST, serializer=ListSerializer)
+        register(set, type_id=TypeId.SET, serializer=SetSerializer)
+        register(dict, type_id=TypeId.MAP, serializer=MapSerializer)
+        try:
+            import pyarrow as pa
+            from pyfury.format.serializer import (
+                ArrowRecordBatchSerializer,
+                ArrowTableSerializer,
+            )
+
+            register(
+                pa.RecordBatch,
+                type_id=TypeId.ARROW_RECORD_BATCH,
+                serializer=ArrowRecordBatchSerializer,
+            )
+            register(
+                pa.Table, type_id=TypeId.ARROW_TABLE, 
serializer=ArrowTableSerializer
+            )
+        except Exception:
+            pass
+
+    def register_type(
+        self,
+        cls: Union[type, TypeVar],
+        *,
+        type_id: int = None,
+        namespace: str = None,
+        typename: str = None,
+        serializer=None,
+    ):
+        return self._register_type(
+            cls,
+            type_id=type_id,
+            namespace=namespace,
+            typename=typename,
+            serializer=serializer,
+        )
+
+    def _register_type(
+        self,
+        cls: Union[type, TypeVar],
+        *,
+        type_id: int = None,
+        namespace: str = None,
+        typename: str = None,
+        serializer=None,
+        internal=False,
+    ):
+        """Register class with given type id or typename. If typename is not 
None, it will be used for
+        cross-language serialization."""
+        if serializer is not None and not isinstance(serializer, Serializer):
+            try:
+                serializer = serializer(self.fury, cls)
+            except BaseException:
+                try:
+                    serializer = serializer(self.fury)
+                except BaseException:
+                    serializer = serializer()
+        n_params = len({typename, type_id, None}) - 1

Review Comment:
   lgtm



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to