This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9e262f4 ARROW-1924: [Python] Bring back pickle=True option for
serialization
9e262f4 is described below
commit 9e262f46a76fd02ee5226ee9e9250a917e93fe0f
Author: Philipp Moritz <[email protected]>
AuthorDate: Mon Dec 18 00:34:56 2017 -0500
ARROW-1924: [Python] Bring back pickle=True option for serialization
Author: Philipp Moritz <[email protected]>
Closes #1420 from pcmoritz/revert-to-pickle-arg and squashes the following
commits:
bfef3aeb [Philipp Moritz] fix windows test
c1566538 [Philipp Moritz] fix remote serialization test on windows
3f58d0df [Philipp Moritz] fix windows
6a2a83dd [Philipp Moritz] add regression test
3eb93258 [Philipp Moritz] fix
518fb7d8 [Philipp Moritz] fix
b4885862 [Philipp Moritz] revert to pickle=True argument for serialization
---
python/pyarrow/serialization.pxi | 14 ++++++++++++--
python/pyarrow/serialization.py | 7 ++-----
python/pyarrow/tests/test_serialization.py | 30 ++++++++++++++++++++++++++----
3 files changed, 40 insertions(+), 11 deletions(-)
diff --git a/python/pyarrow/serialization.pxi b/python/pyarrow/serialization.pxi
index cbc5e3b..d95d582 100644
--- a/python/pyarrow/serialization.pxi
+++ b/python/pyarrow/serialization.pxi
@@ -47,6 +47,7 @@ cdef class SerializationContext:
cdef:
object type_to_type_id
object whitelisted_types
+ object types_to_pickle
object custom_serializers
object custom_deserializers
@@ -54,6 +55,7 @@ cdef class SerializationContext:
# Types with special serialization handlers
self.type_to_type_id = dict()
self.whitelisted_types = dict()
+ self.types_to_pickle = set()
self.custom_serializers = dict()
self.custom_deserializers = dict()
@@ -73,7 +75,7 @@ cdef class SerializationContext:
return result
- def register_type(self, type_, type_id,
+ def register_type(self, type_, type_id, pickle=False,
custom_serializer=None, custom_deserializer=None):
"""EXPERIMENTAL: Add type to the list of types we can serialize.
@@ -83,6 +85,9 @@ cdef class SerializationContext:
The type that we can serialize.
type_id : bytes
A string of bytes used to identify the type.
+ pickle : bool
+ True if the serialization should be done with pickle.
+ False if it should be done efficiently with Arrow.
custom_serializer : callable
This argument is optional, but can be provided to
serialize objects of the class in a particular way.
@@ -92,6 +97,8 @@ cdef class SerializationContext:
"""
self.type_to_type_id[type_] = type_id
self.whitelisted_types[type_id] = type_
+ if pickle:
+ self.types_to_pickle.add(type_id)
if custom_serializer is not None:
self.custom_serializers[type_id] = custom_serializer
self.custom_deserializers[type_id] = custom_deserializer
@@ -111,7 +118,9 @@ cdef class SerializationContext:
# use the closest match to type(obj)
type_id = self.type_to_type_id[type_]
- if type_id in self.custom_serializers:
+ if type_id in self.types_to_pickle:
+ serialized_obj = {"data": pickle.dumps(obj), "pickle": True}
+ elif type_id in self.custom_serializers:
serialized_obj = {"data": self.custom_serializers[type_id](obj)}
else:
if is_named_tuple(type_):
@@ -132,6 +141,7 @@ cdef class SerializationContext:
# The object was pickled, so unpickle it.
obj = pickle.loads(serialized_obj["data"])
else:
+ assert type_id not in self.types_to_pickle
if type_id not in self.whitelisted_types:
msg = "Type ID " + str(type_id) + " not registered in " \
"deserialization callback"
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index 3059dfc..689ec15 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -104,12 +104,9 @@ def
register_default_serialization_handlers(serialization_context):
serialization_context.register_type(
type(lambda: 0), "function",
- custom_serializer=cloudpickle.dumps,
- custom_deserializer=cloudpickle.loads)
+ pickle=True)
- serialization_context.register_type(type, "type",
- custom_serializer=cloudpickle.dumps,
- custom_deserializer=cloudpickle.loads)
+ serialization_context.register_type(type, "type", pickle=True)
serialization_context.register_type(
np.ndarray, 'np.array',
diff --git a/python/pyarrow/tests/test_serialization.py
b/python/pyarrow/tests/test_serialization.py
index 6d85621..f245dc2 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -21,9 +21,9 @@ import pytest
from collections import namedtuple, OrderedDict, defaultdict
import datetime
+import os
import string
import sys
-import pickle
import pyarrow as pa
import numpy as np
@@ -198,9 +198,7 @@ def make_serialization_context():
context.register_type(Baz, "Baz")
context.register_type(Qux, "Quz")
context.register_type(SubQux, "SubQux")
- context.register_type(SubQuxPickle, "SubQuxPickle",
- custom_serializer=pickle.dumps,
- custom_deserializer=pickle.loads)
+ context.register_type(SubQuxPickle, "SubQuxPickle", pickle=True)
context.register_type(Exception, "Exception")
context.register_type(CustomError, "CustomError")
context.register_type(Point, "Point")
@@ -519,3 +517,27 @@ def test_serialize_to_components_invalid_cases():
with pytest.raises(pa.ArrowException):
pa.deserialize_components(components)
+
+
[email protected](os.name == 'nt', reason="deserialize_regex not pickleable")
+def test_deserialize_in_different_process():
+ from multiprocessing import Process, Queue
+ import re
+
+ regex = re.compile(r"\d+\.\d*")
+
+ serialization_context = pa.SerializationContext()
+ serialization_context.register_type(type(regex), "Regex", pickle=True)
+
+ serialized = pa.serialize(regex, serialization_context)
+ serialized_bytes = serialized.to_buffer().to_pybytes()
+
+ def deserialize_regex(serialized, q):
+ import pyarrow as pa
+ q.put(pa.deserialize(serialized))
+
+ q = Queue()
+ p = Process(target=deserialize_regex, args=(serialized_bytes, q))
+ p.start()
+ assert q.get().pattern == regex.pattern
+ p.join()
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].