This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e262f4  ARROW-1924: [Python] Bring back pickle=True option for 
serialization
9e262f4 is described below

commit 9e262f46a76fd02ee5226ee9e9250a917e93fe0f
Author: Philipp Moritz <[email protected]>
AuthorDate: Mon Dec 18 00:34:56 2017 -0500

    ARROW-1924: [Python] Bring back pickle=True option for serialization
    
    Author: Philipp Moritz <[email protected]>
    
    Closes #1420 from pcmoritz/revert-to-pickle-arg and squashes the following 
commits:
    
    bfef3aeb [Philipp Moritz] fix windows test
    c1566538 [Philipp Moritz] fix remote serialization test on windows
    3f58d0df [Philipp Moritz] fix windows
    6a2a83dd [Philipp Moritz] add regression test
    3eb93258 [Philipp Moritz] fix
    518fb7d8 [Philipp Moritz] fix
    b4885862 [Philipp Moritz] revert to pickle=True argument for serialization
---
 python/pyarrow/serialization.pxi           | 14 ++++++++++++--
 python/pyarrow/serialization.py            |  7 ++-----
 python/pyarrow/tests/test_serialization.py | 30 ++++++++++++++++++++++++++----
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/python/pyarrow/serialization.pxi b/python/pyarrow/serialization.pxi
index cbc5e3b..d95d582 100644
--- a/python/pyarrow/serialization.pxi
+++ b/python/pyarrow/serialization.pxi
@@ -47,6 +47,7 @@ cdef class SerializationContext:
     cdef:
         object type_to_type_id
         object whitelisted_types
+        object types_to_pickle
         object custom_serializers
         object custom_deserializers
 
@@ -54,6 +55,7 @@ cdef class SerializationContext:
         # Types with special serialization handlers
         self.type_to_type_id = dict()
         self.whitelisted_types = dict()
+        self.types_to_pickle = set()
         self.custom_serializers = dict()
         self.custom_deserializers = dict()
 
@@ -73,7 +75,7 @@ cdef class SerializationContext:
 
         return result
 
-    def register_type(self, type_, type_id,
+    def register_type(self, type_, type_id, pickle=False,
                       custom_serializer=None, custom_deserializer=None):
         """EXPERIMENTAL: Add type to the list of types we can serialize.
 
@@ -83,6 +85,9 @@ cdef class SerializationContext:
             The type that we can serialize.
         type_id : bytes
             A string of bytes used to identify the type.
+        pickle : bool
+            True if the serialization should be done with pickle.
+            False if it should be done efficiently with Arrow.
         custom_serializer : callable
             This argument is optional, but can be provided to
             serialize objects of the class in a particular way.
@@ -92,6 +97,8 @@ cdef class SerializationContext:
         """
         self.type_to_type_id[type_] = type_id
         self.whitelisted_types[type_id] = type_
+        if pickle:
+            self.types_to_pickle.add(type_id)
         if custom_serializer is not None:
             self.custom_serializers[type_id] = custom_serializer
             self.custom_deserializers[type_id] = custom_deserializer
@@ -111,7 +118,9 @@ cdef class SerializationContext:
 
         # use the closest match to type(obj)
         type_id = self.type_to_type_id[type_]
-        if type_id in self.custom_serializers:
+        if type_id in self.types_to_pickle:
+            serialized_obj = {"data": pickle.dumps(obj), "pickle": True}
+        elif type_id in self.custom_serializers:
             serialized_obj = {"data": self.custom_serializers[type_id](obj)}
         else:
             if is_named_tuple(type_):
@@ -132,6 +141,7 @@ cdef class SerializationContext:
             # The object was pickled, so unpickle it.
             obj = pickle.loads(serialized_obj["data"])
         else:
+            assert type_id not in self.types_to_pickle
             if type_id not in self.whitelisted_types:
                 msg = "Type ID " + str(type_id) + " not registered in " \
                       "deserialization callback"
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index 3059dfc..689ec15 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -104,12 +104,9 @@ def 
register_default_serialization_handlers(serialization_context):
 
     serialization_context.register_type(
         type(lambda: 0), "function",
-        custom_serializer=cloudpickle.dumps,
-        custom_deserializer=cloudpickle.loads)
+        pickle=True)
 
-    serialization_context.register_type(type, "type",
-                                        custom_serializer=cloudpickle.dumps,
-                                        custom_deserializer=cloudpickle.loads)
+    serialization_context.register_type(type, "type", pickle=True)
 
     serialization_context.register_type(
         np.ndarray, 'np.array',
diff --git a/python/pyarrow/tests/test_serialization.py 
b/python/pyarrow/tests/test_serialization.py
index 6d85621..f245dc2 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -21,9 +21,9 @@ import pytest
 
 from collections import namedtuple, OrderedDict, defaultdict
 import datetime
+import os
 import string
 import sys
-import pickle
 
 import pyarrow as pa
 import numpy as np
@@ -198,9 +198,7 @@ def make_serialization_context():
     context.register_type(Baz, "Baz")
     context.register_type(Qux, "Quz")
     context.register_type(SubQux, "SubQux")
-    context.register_type(SubQuxPickle, "SubQuxPickle",
-                          custom_serializer=pickle.dumps,
-                          custom_deserializer=pickle.loads)
+    context.register_type(SubQuxPickle, "SubQuxPickle", pickle=True)
     context.register_type(Exception, "Exception")
     context.register_type(CustomError, "CustomError")
     context.register_type(Point, "Point")
@@ -519,3 +517,27 @@ def test_serialize_to_components_invalid_cases():
 
     with pytest.raises(pa.ArrowException):
         pa.deserialize_components(components)
+
+
[email protected](os.name == 'nt', reason="deserialize_regex not pickleable")
+def test_deserialize_in_different_process():
+    from multiprocessing import Process, Queue
+    import re
+
+    regex = re.compile(r"\d+\.\d*")
+
+    serialization_context = pa.SerializationContext()
+    serialization_context.register_type(type(regex), "Regex", pickle=True)
+
+    serialized = pa.serialize(regex, serialization_context)
+    serialized_bytes = serialized.to_buffer().to_pybytes()
+
+    def deserialize_regex(serialized, q):
+        import pyarrow as pa
+        q.put(pa.deserialize(serialized))
+
+    q = Queue()
+    p = Process(target=deserialize_regex, args=(serialized_bytes, q))
+    p.start()
+    assert q.get().pattern == regex.pattern
+    p.join()

-- 
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].

Reply via email to