Author: Armin Rigo <[email protected]>
Branch: py3.5
Changeset: r86661:4384ff755734
Date: 2016-08-28 22:16 +0200
http://bitbucket.org/pypy/pypy/changeset/4384ff755734/
Log: hg merge py3.5-marshal3
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -837,13 +837,13 @@
self.interned_strings.set(u, w_s1)
return w_s1
- def is_interned_str(self, s):
+ def get_interned_str(self, s):
"""Assumes an identifier (utf-8 encoded str)"""
# interface for marshal_impl
if not we_are_translated():
assert type(s) is str
u = s.decode('utf-8')
- return self.interned_strings.get(u) is not None
+ return self.interned_strings.get(u) # may be None
def descr_self_interp_w(self, RequiredClass, w_obj):
if not isinstance(w_obj, RequiredClass):
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -37,7 +37,7 @@
# different value for the highest 16 bits. Bump pypy_incremental_magic every
# time you make pyc files incompatible
-pypy_incremental_magic = 64 # bump it by 16
+pypy_incremental_magic = 80 # bump it by 16
assert pypy_incremental_magic % 16 == 0
assert pypy_incremental_magic < 3000 # the magic number of Python 3. There are
# no known magic numbers below this value
diff --git a/pypy/module/_frozen_importlib/__init__.py
b/pypy/module/_frozen_importlib/__init__.py
--- a/pypy/module/_frozen_importlib/__init__.py
+++ b/pypy/module/_frozen_importlib/__init__.py
@@ -47,8 +47,10 @@
def _cached_compile(space, name, source, *args):
from rpython.config.translationoption import CACHE_DIR
from pypy.module.marshal import interp_marshal
+ from pypy.interpreter.pycode import default_magic
- cachename = os.path.join(CACHE_DIR, 'frozen_importlib_%s' % (name,))
+ cachename = os.path.join(CACHE_DIR, 'frozen_importlib_%d%s' % (
+ default_magic, name))
try:
if space.config.translating:
raise IOError("don't use the cache when translating pypy")
diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py
--- a/pypy/module/imp/importing.py
+++ b/pypy/module/imp/importing.py
@@ -228,7 +228,7 @@
# CPython + 7 = default_magic -- used by PyPy (incompatible!)
#
from pypy.interpreter.pycode import default_magic
-MARSHAL_VERSION_FOR_PYC = 2
+MARSHAL_VERSION_FOR_PYC = 4
def get_pyc_magic(space):
return default_magic
diff --git a/pypy/module/marshal/interp_marshal.py
b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -4,24 +4,30 @@
from rpython.rlib import rstackovf
from pypy.objspace.std.marshal_impl import marshal, get_unmarshallers
+#
+# Write Python objects to files and read them back. This is primarily
+# intended for writing and reading compiled Python code, even though
+# dicts, lists, sets and frozensets, not commonly seen in code
+# objects, are supported. Version 3 of this protocol properly
+# supports circular links and sharing. The previous version is called
+# "2", like in Python 2.7, although it is not always compatible
+# between CPython 2.7 and CPython 3.4. Version 4 adds small
+# optimizations in compactness.
+#
+# XXX: before py3k, there was logic to do efficiently dump()/load() on
+# a file object. The corresponding logic is gone from CPython 3.x, so
+# I don't feel bad about killing it here too.
+#
-Py_MARSHAL_VERSION = 2
+Py_MARSHAL_VERSION = 4
+
@unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
def dump(space, w_data, w_f, w_version):
"""Write the 'data' object into the open file 'f'."""
- # XXX: before py3k, we special-cased W_File to use a more performant
- # FileWriter class. Should we do the same for py3k? Look also at
- # DirectStreamWriter
- writer = FileWriter(space, w_f)
- try:
- # note: bound methods are currently not supported,
- # so we have to pass the instance in, instead.
- ##m = Marshaller(space, writer.write, space.int_w(w_version))
- m = Marshaller(space, writer, space.int_w(w_version))
- m.dump_w_obj(w_data)
- finally:
- writer.finished()
+ # same implementation as CPython 3.x.
+ w_string = dumps(space, w_data, w_version)
+ space.call_method(w_f, 'write', w_string)
@unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
def dumps(space, w_data, w_version):
@@ -33,9 +39,6 @@
def load(space, w_f):
"""Read one value from the file 'f' and return it."""
- # XXX: before py3k, we special-cased W_File to use a more performant
- # FileWriter class. Should we do the same for py3k? Look also at
- # DirectStreamReader
reader = FileReader(space, w_f)
try:
u = Unmarshaller(space, reader)
@@ -68,22 +71,6 @@
def write(self, data):
raise NotImplementedError("Purely abstract method")
-class FileWriter(AbstractReaderWriter):
- def __init__(self, space, w_f):
- AbstractReaderWriter.__init__(self, space)
- try:
- self.func = space.getattr(w_f, space.wrap('write'))
- # XXX how to check if it is callable?
- except OperationError as e:
- if not e.match(space, space.w_AttributeError):
- raise
- raise oefmt(space.w_TypeError,
- "marshal.dump() 2nd arg must be file-like object")
-
- def write(self, data):
- space = self.space
- space.call_function(self.func, space.newbytes(data))
-
class FileReader(AbstractReaderWriter):
def __init__(self, space, w_f):
@@ -111,33 +98,6 @@
return ret
-class StreamReaderWriter(AbstractReaderWriter):
- def __init__(self, space, file):
- AbstractReaderWriter.__init__(self, space)
- self.file = file
- file.lock()
-
- def finished(self):
- self.file.unlock()
-
-class DirectStreamWriter(StreamReaderWriter):
- """
- XXX: this class is unused right now. Look at the comment in dump()
- """
- def write(self, data):
- self.file.do_direct_write(data)
-
-class DirectStreamReader(StreamReaderWriter):
- """
- XXX: this class is unused right now. Look at the comment in dump()
- """
- def read(self, n):
- data = self.file.direct_read(n)
- if len(data) < n:
- self.raise_eof()
- return data
-
-
class _Base(object):
def raise_exc(self, msg):
space = self.space
@@ -168,7 +128,15 @@
## self.put = putfunc
self.writer = writer
self.version = version
- self.stringtable = {}
+ self.all_refs = {}
+ # all_refs = {w_obj: index} for all w_obj that are of a
+ # "reasonably sharable" type. CPython checks the refcount of
+ # any object to know if it is sharable, independently of its
+ # type. We can't do that. We could do a two-pass marshaller.
+ # For now we simply add to this list all objects that marshal to
+ # more than a few fixed-sized bytes, minus ones like code
+ # objects that never appear more than once except in complete
+ # corner cases.
## currently we cannot use a put that is a bound method
## from outside. Same holds for get.
@@ -239,10 +207,13 @@
rstackovf.check_stack_overflow()
self._overflow()
- def put_tuple_w(self, typecode, lst_w):
+ def put_tuple_w(self, typecode, lst_w, single_byte_size=False):
self.start(typecode)
lng = len(lst_w)
- self.put_int(lng)
+ if single_byte_size:
+ self.put(chr(lng))
+ else:
+ self.put_int(lng)
idx = 0
while idx < lng:
w_obj = lst_w[idx]
@@ -333,19 +304,35 @@
def invalid_typecode(space, u, tc):
- u.raise_exc("bad marshal data (unknown type code)")
+ u.raise_exc("bad marshal data (unknown type code %d)" % (ord(tc),))
+def _make_unmarshall_and_save_ref(func):
+ def unmarshall_save_ref(space, u, tc):
+ index = len(u.refs_w)
+ u.refs_w.append(None)
+ w_obj = func(space, u, tc)
+ u.refs_w[index] = w_obj
+ return w_obj
+ return unmarshall_save_ref
-class Unmarshaller(_Base):
+def _make_unmarshaller_dispatch():
_dispatch = [invalid_typecode] * 256
for tc, func in get_unmarshallers():
_dispatch[ord(tc)] = func
+ for tc, func in get_unmarshallers():
+ if tc < '\x80' and _dispatch[ord(tc) + 0x80] is invalid_typecode:
+ _dispatch[ord(tc) + 0x80] = _make_unmarshall_and_save_ref(func)
+ return _dispatch
+
+
+class Unmarshaller(_Base):
+ _dispatch = _make_unmarshaller_dispatch()
def __init__(self, space, reader):
self.space = space
self.reader = reader
- self.stringtable_w = []
+ self.refs_w = []
def get(self, n):
assert n >= 0
@@ -355,6 +342,10 @@
# the [0] is used to convince the annotator to return a char
return self.get(1)[0]
+ def save_ref(self, typecode, w_obj):
+ if typecode >= '\x80':
+ self.refs_w.append(w_obj)
+
def atom_str(self, typecode):
self.start(typecode)
lng = self.get_lng()
@@ -425,8 +416,11 @@
self._overflow()
# inlined version to save a recursion level
- def get_tuple_w(self):
- lng = self.get_lng()
+ def get_tuple_w(self, single_byte_size=False):
+ if single_byte_size:
+ lng = ord(self.get1())
+ else:
+ lng = self.get_lng()
res_w = [None] * lng
idx = 0
space = self.space
@@ -442,9 +436,6 @@
raise oefmt(space.w_TypeError, "NULL object in marshal data")
return res_w
- def get_list_w(self):
- return self.get_tuple_w()[:]
-
def _overflow(self):
self.raise_exc('object too deeply nested to unmarshal')
diff --git a/pypy/module/marshal/test/test_marshal.py
b/pypy/module/marshal/test/test_marshal.py
--- a/pypy/module/marshal/test/test_marshal.py
+++ b/pypy/module/marshal/test/test_marshal.py
@@ -199,7 +199,7 @@
def test_bad_typecode(self):
import marshal
exc = raises(ValueError, marshal.loads, bytes([1]))
- assert str(exc.value) == "bad marshal data (unknown type code)"
+ assert str(exc.value).startswith("bad marshal data (unknown type code")
def test_bad_data(self):
# If you have sufficiently little memory, the line at the end of the
diff --git a/pypy/module/marshal/test/test_marshalimpl.py
b/pypy/module/marshal/test/test_marshalimpl.py
--- a/pypy/module/marshal/test/test_marshalimpl.py
+++ b/pypy/module/marshal/test/test_marshalimpl.py
@@ -6,20 +6,6 @@
class AppTestMarshalMore:
spaceconfig = dict(usemodules=('array',))
- def test_unmarshal_int64(self):
- # test that we can unmarshal 64-bit ints on 32-bit platforms
- # (of course we only test that if we're running on such a
- # platform :-)
- import marshal
- z = marshal.loads(b'I\x00\xe4\x0bT\x02\x00\x00\x00')
- assert z == 10000000000
- z = marshal.loads(b'I\x00\x1c\xf4\xab\xfd\xff\xff\xff')
- assert z == -10000000000
- z = marshal.loads(b'I\x88\x87\x86\x85\x84\x83\x82\x01')
- assert z == 108793946209421192
- z = marshal.loads(b'I\xd8\xd8\xd9\xda\xdb\xdc\xcd\xfe')
- assert z == -0x0132232425262728
-
def test_marshal_bufferlike_object(self):
import marshal, array
s = marshal.dumps(array.array('b', b'asd'))
@@ -33,10 +19,6 @@
def test_unmarshal_evil_long(self):
import marshal
raises(ValueError, marshal.loads, b'l\x02\x00\x00\x00\x00\x00\x00\x00')
- z = marshal.loads(b'I\x00\xe4\x0bT\x02\x00\x00\x00')
- assert z == 10000000000
- z = marshal.loads(b'I\x00\x1c\xf4\xab\xfd\xff\xff\xff')
- assert z == -10000000000
def test_marshal_code_object(self):
def foo(a, b):
@@ -49,6 +31,37 @@
if attr_name.startswith("co_"):
assert getattr(code2, attr_name) == getattr(foo.__code__,
attr_name)
+ def test_unmarshal_ascii(self):
+ import marshal
+ s = marshal.loads(b"a\x04\x00\x00\x00ab\xc2\x84")
+ assert s == "ab\xc2\x84"
+ s = marshal.loads(b"A\x04\x00\x00\x00ab\xc2\x84")
+ assert s == "ab\xc2\x84"
+ s = marshal.loads(b"z\x04ab\xc2\x84")
+ assert s == "ab\xc2\x84"
+ s = marshal.loads(b"Z\x04ab\xc2\x84")
+ assert s == "ab\xc2\x84"
+
+ def test_shared_string(self):
+ import marshal
+ x = "hello, "
+ x += "world"
+ xl = 256
+ xl **= 100
+ for version in [2, 3]:
+ s = marshal.dumps((x, x), version)
+ assert s.count(b'hello, world') == 2 if version < 3 else 1
+ y = marshal.loads(s)
+ assert y == (x, x)
+ #
+ s = marshal.dumps((xl, xl), version)
+ if version < 3:
+ assert 200 < len(s) < 250
+ else:
+ assert 100 < len(s) < 125
+ yl = marshal.loads(s)
+ assert yl == (xl, xl)
+
class AppTestMarshalSmallLong(AppTestMarshalMore):
spaceconfig = dict(usemodules=('array',),
@@ -62,6 +75,7 @@
# NOTE: marshal is platform independent, running this test must assume
# that self.seen gets values from the endianess of the marshal module.
# (which is little endian!)
+ version = 2
def __init__(self):
self.seen = []
def start(self, code):
diff --git a/pypy/objspace/std/marshal_impl.py
b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -2,6 +2,7 @@
from rpython.rlib.rstring import StringBuilder
from rpython.rlib.rstruct import ieee
from rpython.rlib.unroll import unrolling_iterable
+from rpython.rlib import objectmodel
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.special import Ellipsis
@@ -29,14 +30,14 @@
TYPE_STOPITER = 'S'
TYPE_ELLIPSIS = '.'
TYPE_INT = 'i'
-TYPE_INT64 = 'I'
TYPE_FLOAT = 'f'
TYPE_BINARY_FLOAT = 'g'
TYPE_COMPLEX = 'x'
TYPE_BINARY_COMPLEX = 'y'
TYPE_LONG = 'l'
-TYPE_STRING = 's'
-TYPE_STRINGREF = 'R'
+TYPE_STRING = 's' # a *byte* string, not unicode
+TYPE_INTERNED = 't'
+TYPE_REF = 'r'
TYPE_TUPLE = '('
TYPE_LIST = '['
TYPE_DICT = '{'
@@ -45,6 +46,15 @@
TYPE_UNKNOWN = '?'
TYPE_SET = '<'
TYPE_FROZENSET = '>'
+FLAG_REF = 0x80 # bit added to mean "add obj to index"
+FLAG_DONE = '\x00'
+
+# the following typecodes have been added in version 4.
+TYPE_ASCII = 'a' # never generated so far by pypy
+TYPE_ASCII_INTERNED = 'A' # never generated so far by pypy
+TYPE_SMALL_TUPLE = ')'
+TYPE_SHORT_ASCII = 'z' # never generated so far by pypy
+TYPE_SHORT_ASCII_INTERNED = 'Z' # never generated so far by pypy
_marshallers = []
@@ -56,12 +66,33 @@
return f
return _decorator
-def unmarshaller(tc):
+def unmarshaller(tc, save_ref=False):
def _decorator(f):
+ assert tc < '\x80'
_unmarshallers.append((tc, f))
+ if save_ref:
+ tcref = chr(ord(tc) + 0x80)
+ _unmarshallers.append((tcref, f))
return f
return _decorator
+def write_ref(typecode, w_obj, m):
+ if m.version < 3:
+ return typecode # not writing object references
+ try:
+ index = m.all_refs[w_obj]
+ except KeyError:
+ # we don't support long indices
+ index = len(m.all_refs)
+ if index >= 0x7fffffff:
+ return typecode
+ m.all_refs[w_obj] = index
+ return chr(ord(typecode) + FLAG_REF)
+ else:
+ # write the reference index to the stream
+ m.atom_int(TYPE_REF, index)
+ return FLAG_DONE
+
def marshal(space, w_obj, m):
# _marshallers_unroll is defined at the end of the file
# NOTE that if w_obj is a heap type, like an instance of a
@@ -80,7 +111,9 @@
if e.match(space, space.w_TypeError):
raise oefmt(space.w_ValueError, "unmarshallable object")
raise
- m.atom_str(TYPE_STRING, s.as_str())
+ typecode = write_ref(TYPE_STRING, w_obj, m)
+ if typecode != FLAG_DONE:
+ m.atom_str(typecode, s.as_str())
def get_unmarshallers():
return _unmarshallers
@@ -130,37 +163,27 @@
@marshaller(W_IntObject)
def marshal_int(space, w_int, m):
- if LONG_BIT == 32:
+ y = w_int.intval >> 31
+ if y and y != -1:
+ marshal_long(space, w_int, m)
+ else:
m.atom_int(TYPE_INT, w_int.intval)
- else:
- y = w_int.intval >> 31
- if y and y != -1:
- m.atom_int64(TYPE_INT64, w_int.intval)
- else:
- m.atom_int(TYPE_INT, w_int.intval)
@unmarshaller(TYPE_INT)
def unmarshal_int(space, u, tc):
return space.newint(u.get_int())
-@unmarshaller(TYPE_INT64)
-def unmarshal_int64(space, u, tc):
- lo = u.get_int() # get the first 32 bits
- hi = u.get_int() # get the next 32 bits
- if LONG_BIT >= 64:
- x = (hi << 32) | (lo & (2**32-1)) # result fits in an int
- else:
- x = (r_longlong(hi) << 32) | r_longlong(r_uint(lo)) # get a r_longlong
- return space.wrap(x)
-
@marshaller(W_AbstractLongObject)
def marshal_long(space, w_long, m):
from rpython.rlib.rarithmetic import r_ulonglong
- m.start(TYPE_LONG)
+ typecode = write_ref(TYPE_LONG, w_long, m)
+ if typecode == FLAG_DONE:
+ return
+ m.start(typecode)
SHIFT = 15
MASK = (1 << SHIFT) - 1
- num = w_long.asbigint()
+ num = space.bigint_w(w_long)
sign = num.sign
num = num.abs()
total_length = (num.bit_length() + (SHIFT - 1)) / SHIFT
@@ -248,59 +271,79 @@
@marshaller(W_BytesObject)
def marshal_bytes(space, w_str, m):
- s = w_str.unwrap(space)
- m.atom_str(TYPE_STRING, s)
+ typecode = write_ref(TYPE_STRING, w_str, m)
+ if typecode != FLAG_DONE:
+ s = space.bytes_w(w_str)
+ m.atom_str(typecode, s)
@unmarshaller(TYPE_STRING)
def unmarshal_bytes(space, u, tc):
return space.newbytes(u.get_str())
-@unmarshaller(TYPE_STRINGREF)
-def unmarshal_stringref(space, u, tc):
- idx = u.get_int()
- try:
- return u.stringtable_w[idx]
- except IndexError:
- raise oefmt(space.w_ValueError, "bad marshal data")
+def _marshal_tuple(space, tuple_w, m):
+ if m.version >= 4 and len(tuple_w) < 256:
+ typecode = TYPE_SMALL_TUPLE
+ single_byte_size = True
+ else:
+ typecode = TYPE_TUPLE
+ single_byte_size = False
+ # -- does it make any sense to try to share tuples, based on the
+ # -- *identity* of the tuple object? I'd guess not really
+ #typecode = write_ref(typecode, w_tuple, m)
+ #if typecode != FLAG_DONE:
+ m.put_tuple_w(typecode, tuple_w, single_byte_size=single_byte_size)
@marshaller(W_AbstractTupleObject)
def marshal_tuple(space, w_tuple, m):
- items = w_tuple.tolist()
- m.put_tuple_w(TYPE_TUPLE, items)
+ _marshal_tuple(space, w_tuple.tolist(), m)
@unmarshaller(TYPE_TUPLE)
def unmarshal_tuple(space, u, tc):
items_w = u.get_tuple_w()
return space.newtuple(items_w)
+@unmarshaller(TYPE_SMALL_TUPLE)
+def unmarshal_tuple(space, u, tc):
+ items_w = u.get_tuple_w(single_byte_size=True)
+ return space.newtuple(items_w)
+
@marshaller(W_ListObject)
def marshal_list(space, w_list, m):
- items = w_list.getitems()[:]
- m.put_tuple_w(TYPE_LIST, items)
+ typecode = write_ref(TYPE_LIST, w_list, m)
+ if typecode != FLAG_DONE:
+ items = w_list.getitems()[:]
+ m.put_tuple_w(typecode, items)
-@unmarshaller(TYPE_LIST)
+@unmarshaller(TYPE_LIST, save_ref=True)
def unmarshal_list(space, u, tc):
- items_w = u.get_list_w()
- return space.newlist(items_w)
+ w_obj = space.newlist([])
+ u.save_ref(tc, w_obj)
+ for w_item in u.get_tuple_w():
+ w_obj.append(w_item)
+ return w_obj
@marshaller(W_DictMultiObject)
def marshal_dict(space, w_dict, m):
- m.start(TYPE_DICT)
+ typecode = write_ref(TYPE_DICT, w_dict, m)
+ if typecode == FLAG_DONE:
+ return
+ m.start(typecode)
for w_tuple in w_dict.items():
w_key, w_value = space.fixedview(w_tuple, 2)
m.put_w_obj(w_key)
m.put_w_obj(w_value)
m.atom(TYPE_NULL)
-@unmarshaller(TYPE_DICT)
+@unmarshaller(TYPE_DICT, save_ref=True)
def unmarshal_dict(space, u, tc):
# since primitive lists are not optimized and we don't know
# the dict size in advance, use the dict's setitem instead
# of building a list of tuples.
w_dic = space.newdict()
+ u.save_ref(tc, w_dic)
while 1:
w_key = u.get_w_obj(allow_null=True)
if w_key is None:
@@ -314,14 +357,9 @@
return None
-def _put_str_list(space, m, strlist):
- m.atom_int(TYPE_TUPLE, len(strlist))
- atom_str = m.atom_str
- for item in strlist:
- atom_str(TYPE_STRING, item)
-
@marshaller(PyCode)
def marshal_pycode(space, w_pycode, m):
+ # (no attempt at using write_ref here, there is little point imho)
m.start(TYPE_CODE)
# see pypy.interpreter.pycode for the layout
x = space.interp_w(PyCode, w_pycode)
@@ -331,105 +369,161 @@
m.put_int(x.co_stacksize)
m.put_int(x.co_flags)
m.atom_str(TYPE_STRING, x.co_code)
- m.put_tuple_w(TYPE_TUPLE, x.co_consts_w)
- _put_str_list(space, m, [space.str_w(w_name) for w_name in x.co_names_w])
- _put_str_list(space, m, x.co_varnames)
- _put_str_list(space, m, x.co_freevars)
- _put_str_list(space, m, x.co_cellvars)
- m.atom_str(TYPE_STRING, x.co_filename)
- m.atom_str(TYPE_STRING, x.co_name)
+ _marshal_tuple(space, x.co_consts_w, m)
+ _marshal_tuple(space, x.co_names_w, m) # list of w_unicodes
+ co_varnames_w = [space.wrap(s.decode('utf-8')) for s in x.co_varnames]
+ co_freevars_w = [space.wrap(s.decode('utf-8')) for s in x.co_freevars]
+ co_cellvars_w = [space.wrap(s.decode('utf-8')) for s in x.co_cellvars]
+ _marshal_tuple(space, co_varnames_w, m) # more lists, now of w_unicodes
+ _marshal_tuple(space, co_freevars_w, m)
+ _marshal_tuple(space, co_cellvars_w, m)
+ _marshal_unicode(space, x.co_filename, m)
+ _marshal_unicode(space, x.co_name, m)
m.put_int(x.co_firstlineno)
m.atom_str(TYPE_STRING, x.co_lnotab)
# helper for unmarshalling "tuple of string" objects
# into rpython-level lists of strings. Only for code objects.
-def unmarshal_str(u):
+def _unmarshal_strlist(u):
+ items_w = _unmarshal_tuple_w(u)
+ return [u.space.unicode_w(w_item).encode('utf-8') for w_item in items_w]
+
+def _unmarshal_tuple_w(u):
w_obj = u.get_w_obj()
try:
- return u.space.bytes_w(w_obj)
- except OperationError as e:
- if e.match(u.space, u.space.w_TypeError):
- u.raise_exc('invalid marshal data for code object')
- else:
- raise
-
-def unmarshal_str0(u):
- w_obj = u.get_w_obj()
- try:
- return u.space.bytes0_w(w_obj)
+ return u.space.fixedview(w_obj)
except OperationError as e:
if e.match(u.space, u.space.w_TypeError):
u.raise_exc('invalid marshal data for code object')
raise
-def unmarshal_strlist(u, tc):
- lng = u.atom_lng(tc)
- return [unmarshal_str(u) for i in range(lng)]
-
-@unmarshaller(TYPE_CODE)
+@unmarshaller(TYPE_CODE, save_ref=True)
def unmarshal_pycode(space, u, tc):
+ w_codeobj = objectmodel.instantiate(PyCode)
+ u.save_ref(tc, w_codeobj)
argcount = u.get_int()
kwonlyargcount = u.get_int()
nlocals = u.get_int()
stacksize = u.get_int()
flags = u.get_int()
- code = unmarshal_str(u)
- u.start(TYPE_TUPLE)
- consts_w = u.get_tuple_w()
- # copy in order not to merge it with anything else
- names = unmarshal_strlist(u, TYPE_TUPLE)
- varnames = unmarshal_strlist(u, TYPE_TUPLE)
- freevars = unmarshal_strlist(u, TYPE_TUPLE)
- cellvars = unmarshal_strlist(u, TYPE_TUPLE)
- filename = unmarshal_str0(u)
- name = unmarshal_str(u)
+ code = space.bytes_w(u.get_w_obj())
+ consts_w = _unmarshal_tuple_w(u)
+ names = _unmarshal_strlist(u)
+ varnames = _unmarshal_strlist(u)
+ freevars = _unmarshal_strlist(u)
+ cellvars = _unmarshal_strlist(u)
+ filename = space.unicode0_w(u.get_w_obj()).encode('utf-8')
+ name = space.unicode_w(u.get_w_obj()).encode('utf-8')
firstlineno = u.get_int()
- lnotab = unmarshal_str(u)
- return PyCode(space, argcount, kwonlyargcount, nlocals, stacksize, flags,
+ lnotab = space.bytes_w(u.get_w_obj())
+ PyCode.__init__(w_codeobj,
+ space, argcount, kwonlyargcount, nlocals, stacksize, flags,
code, consts_w[:], names, varnames, filename,
name, firstlineno, lnotab, freevars, cellvars)
+ return w_codeobj
+def _marshal_unicode(space, s, m, w_unicode=None):
+ if m.version >= 3:
+ w_interned = space.get_interned_str(s)
+ else:
+ w_interned = None
+ if w_interned is not None:
+ w_unicode = w_interned # use the interned W_UnicodeObject
+ typecode = TYPE_INTERNED # as a key for u.all_refs
+ else:
+ typecode = TYPE_UNICODE
+ if w_unicode is not None:
+ typecode = write_ref(typecode, w_unicode, m)
+ if typecode != FLAG_DONE:
+ m.atom_str(typecode, s)
+
@marshaller(W_UnicodeObject)
def marshal_unicode(space, w_unicode, m):
s = unicodehelper.encode_utf8(space, space.unicode_w(w_unicode),
allow_surrogates=True)
- m.atom_str(TYPE_UNICODE, s)
+ _marshal_unicode(space, s, m, w_unicode=w_unicode)
@unmarshaller(TYPE_UNICODE)
def unmarshal_unicode(space, u, tc):
- return space.wrap(unicodehelper.decode_utf8(space, u.get_str(),
- allow_surrogates=True))
+ uc = unicodehelper.decode_utf8(space, u.get_str(), allow_surrogates=True)
+ return space.newunicode(uc)
+
+@unmarshaller(TYPE_INTERNED)
+def unmarshal_bytes(space, u, tc):
+ return space.new_interned_str(u.get_str())
+
+def _unmarshal_ascii(u, short_length, interned):
+ if short_length:
+ lng = ord(u.get1())
+ else:
+ lng = u.get_lng()
+ s = u.get(lng)
+ w_u = u.space.newunicode(s.decode('latin-1'))
+ if interned:
+ w_u = u.space.new_interned_w_str(w_u)
+ return w_u
+
+@unmarshaller(TYPE_ASCII) # nb. never generated by pypy so far
+def unmarshal_ascii(space, u, tc):
+ return _unmarshal_ascii(u, False, False)
+@unmarshaller(TYPE_ASCII_INTERNED)
+def unmarshal_ascii(space, u, tc):
+ return _unmarshal_ascii(u, False, True)
+@unmarshaller(TYPE_SHORT_ASCII)
+def unmarshal_ascii(space, u, tc):
+ return _unmarshal_ascii(u, True, False)
+@unmarshaller(TYPE_SHORT_ASCII_INTERNED)
+def unmarshal_ascii(space, u, tc):
+ return _unmarshal_ascii(u, True, True)
+
@marshaller(W_SetObject)
def marshal_set(space, w_set, m):
- lis_w = space.fixedview(w_set)
- m.put_tuple_w(TYPE_SET, lis_w)
+ typecode = write_ref(TYPE_SET, w_set, m)
+ if typecode != FLAG_DONE:
+ lis_w = space.fixedview(w_set)
+ m.put_tuple_w(typecode, lis_w)
-@unmarshaller(TYPE_SET)
+@unmarshaller(TYPE_SET, save_ref=True)
def unmarshal_set(space, u, tc):
- return unmarshal_set_frozenset(space, u, tc)
+ w_set = space.call_function(space.w_set)
+ u.save_ref(tc, w_set)
+ _unmarshal_set_frozenset(space, u, w_set)
+ return w_set
@marshaller(W_FrozensetObject)
def marshal_frozenset(space, w_frozenset, m):
- lis_w = space.fixedview(w_frozenset)
- m.put_tuple_w(TYPE_FROZENSET, lis_w)
+ typecode = write_ref(TYPE_FROZENSET, w_frozenset, m)
+ if typecode != FLAG_DONE:
+ lis_w = space.fixedview(w_frozenset)
+ m.put_tuple_w(typecode, lis_w)
-def unmarshal_set_frozenset(space, u, tc):
+def _unmarshal_set_frozenset(space, u, w_set):
lng = u.get_lng()
- w_set = space.call_function(space.w_set)
for i in xrange(lng):
w_obj = u.get_w_obj()
space.call_method(w_set, "add", w_obj)
- if tc == TYPE_FROZENSET:
- w_set = space.call_function(space.w_frozenset, w_set)
- return w_set
@unmarshaller(TYPE_FROZENSET)
def unmarshal_frozenset(space, u, tc):
- return unmarshal_set_frozenset(space, u, tc)
+ w_set = space.call_function(space.w_set)
+ _unmarshal_set_frozenset(space, u, w_set)
+ return space.call_function(space.w_frozenset, w_set)
+
+
+@unmarshaller(TYPE_REF)
+def unmarshal_ref(space, u, tc):
+ index = u.get_lng()
+ if 0 <= index < len(u.refs_w):
+ w_obj = u.refs_w[index]
+ else:
+ w_obj = None
+ if w_obj is None:
+ raise oefmt(space.w_ValueError, "bad marshal data (invalid reference)")
+ return w_obj
_marshallers_unroll = unrolling_iterable(_marshallers)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit