Author: Ronan Lamy <ronan.l...@gmail.com> Branch: PyBuffer-backport Changeset: r91172:fea54844618f Date: 2017-05-02 19:45 +0100 http://bitbucket.org/pypy/pypy/changeset/fea54844618f/
Log: Backport memoryview changes diff --git a/pypy/interpreter/buffer.py b/pypy/interpreter/buffer.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/buffer.py @@ -0,0 +1,321 @@ +from rpython.rlib.rstruct.error import StructError +from rpython.rlib.buffer import StringBuffer, SubBuffer + +from pypy.interpreter.error import oefmt + +class BufferInterfaceNotFound(Exception): + pass + + +class BufferView(object): + """Abstract base class for buffers.""" + _attrs_ = ['readonly'] + _immutable_ = True + + def getlength(self): + """Returns the size in bytes (even if getitemsize() > 1).""" + raise NotImplementedError + + def as_str(self): + "Returns an interp-level string with the whole content of the buffer." + return ''.join(self._copy_buffer()) + + def getbytes(self, start, size): + """Return `size` bytes starting at byte offset `start`. + + This is a low-level operation, it is up to the caller to ensure that + the data requested actually correspond to items accessible from the + BufferView. + Note that `start` may be negative, e.g. if the buffer is reversed. + """ + raise NotImplementedError + + def setbytes(self, start, string): + raise NotImplementedError + + def get_raw_address(self): + raise ValueError("no raw buffer") + + def as_readbuf(self): + # Inefficient. May be overridden. + return StringBuffer(self.as_str()) + + def as_writebuf(self): + """Return a writable Buffer sharing the same data as `self`.""" + raise BufferInterfaceNotFound + + def getformat(self): + raise NotImplementedError + + def getitemsize(self): + raise NotImplementedError + + def getndim(self): + raise NotImplementedError + + def getshape(self): + raise NotImplementedError + + def getstrides(self): + raise NotImplementedError + + def releasebuffer(self): + pass + + def value_from_bytes(self, space, s): + from pypy.module.struct.formatiterator import UnpackFormatIterator + buf = StringBuffer(s) + fmtiter = UnpackFormatIterator(space, buf) + fmtiter.interpret(self.getformat()) + return fmtiter.result_w[0] + + def bytes_from_value(self, space, w_val): + from pypy.module.struct.formatiterator import PackFormatIterator + itemsize = self.getitemsize() + fmtiter = PackFormatIterator(space, [w_val], itemsize) + try: + fmtiter.interpret(self.getformat()) + except StructError as e: + raise oefmt(space.w_TypeError, + "memoryview: invalid type for format '%s'", + self.getformat()) + return fmtiter.result.build() + + def _copy_buffer(self): + if self.getndim() == 0: + itemsize = self.getitemsize() + return [self.getbytes(0, itemsize)] + data = [] + self._copy_rec(0, data, 0) + return data + + def _copy_rec(self, idim, data, off): + shapes = self.getshape() + shape = shapes[idim] + strides = self.getstrides() + + if self.getndim() - 1 == idim: + self._copy_base(data, off) + return + + for i in range(shape): + self._copy_rec(idim + 1, data, off) + off += strides[idim] + + def _copy_base(self, data, off): + shapes = self.getshape() + step = shapes[0] + strides = self.getstrides() + itemsize = self.getitemsize() + bytesize = self.getlength() + copiedbytes = 0 + for i in range(step): + bytes = self.getbytes(off, itemsize) + data.append(bytes) + copiedbytes += len(bytes) + off += strides[0] + # do notcopy data if the sub buffer is out of bounds + if copiedbytes >= bytesize: + break + + def get_offset(self, space, dim, index): + "Convert index at dimension `dim` into a byte offset" + shape = self.getshape() + nitems = shape[dim] + if index < 0: + index += nitems + if index < 0 or index >= nitems: + raise oefmt(space.w_IndexError, + "index out of bounds on dimension %d", dim + 1) + # TODO suboffsets? + strides = self.getstrides() + return strides[dim] * index + + def w_getitem(self, space, idx): + offset = self.get_offset(space, 0, idx) + itemsize = self.getitemsize() + # TODO: this probably isn't very fast + data = self.getbytes(offset, itemsize) + return self.value_from_bytes(space, data) + + def new_slice(self, start, step, slicelength): + return BufferSlice(self, start, step, slicelength) + + def setitem_w(self, space, idx, w_obj): + offset = self.get_offset(space, 0, idx) + # TODO: this probably isn't very fast + byteval = self.bytes_from_value(space, w_obj) + self.setbytes(offset, byteval) + + def w_tolist(self, space): + dim = self.getndim() + if dim == 0: + raise NotImplementedError + elif dim == 1: + n = self.getshape()[0] + values_w = [space.ord(self.w_getitem(space, i)) for i in range(n)] + return space.newlist(values_w) + else: + return self._tolist_rec(space, 0, 0) + + def _tolist_rec(self, space, start, idim): + strides = self.getstrides() + shape = self.getshape() + # + dim = idim + 1 + stride = strides[idim] + itemsize = self.getitemsize() + dimshape = shape[idim] + # + if dim >= self.getndim(): + bytecount = (stride * dimshape) + values_w = [ + self.value_from_bytes(space, self.getbytes(pos, itemsize)) + for pos in range(start, start + bytecount, stride)] + return space.newlist(values_w) + + items = [None] * dimshape + for i in range(dimshape): + item = self._tolist_rec(space, start, idim + 1) + items[i] = item + start += stride + + return space.newlist(items) + + def wrap(self, space): + return space.newmemoryview(self) + + +class SimpleView(BufferView): + _attrs_ = ['readonly', 'data'] + _immutable_ = True + + def __init__(self, data): + self.data = data + self.readonly = self.data.readonly + + def getlength(self): + return self.data.getlength() + + def as_str(self): + return self.data.as_str() + + def getbytes(self, start, size): + return self.data[start:start + size] + + def setbytes(self, offset, s): + self.data.setslice(offset, s) + + def get_raw_address(self): + return self.data.get_raw_address() + + def as_readbuf(self): + return self.data + + def as_writebuf(self): + assert not self.data.readonly + return self.data + + def getformat(self): + return 'B' + + def getitemsize(self): + return 1 + + def getndim(self): + return 1 + + def getshape(self): + return [self.getlength()] + + def getstrides(self): + return [1] + + def get_offset(self, space, dim, index): + "Convert index at dimension `dim` into a byte offset" + assert dim == 0 + nitems = self.getlength() + if index < 0: + index += nitems + if index < 0 or index >= nitems: + raise oefmt(space.w_IndexError, + "index out of bounds on dimension %d", dim + 1) + return index + + def w_getitem(self, space, idx): + idx = self.get_offset(space, 0, idx) + ch = self.data[idx] + return space.newbytes(ch) + + def new_slice(self, start, step, slicelength): + if step == 1: + return SimpleView(SubBuffer(self.data, start, slicelength)) + else: + return BufferSlice(self, start, step, slicelength) + + def setitem_w(self, space, idx, w_obj): + idx = self.get_offset(space, 0, idx) + self.data[idx] = space.byte_w(w_obj) + + +class BufferSlice(BufferView): + _immutable_ = True + _attrs_ = ['parent', 'readonly', 'shape', 'strides', 'start', 'step'] + + def __init__(self, parent, start, step, length): + self.parent = parent + self.readonly = self.parent.readonly + self.strides = parent.getstrides()[:] + self.start = start + self.step = step + self.strides[0] *= step + self.shape = parent.getshape()[:] + self.shape[0] = length + + def getlength(self): + return self.shape[0] * self.getitemsize() + + def getbytes(self, start, size): + offset = self.start * self.parent.getstrides()[0] + return self.parent.getbytes(offset + start, size) + + def setbytes(self, start, string): + if len(string) == 0: + return # otherwise, adding self.offset might make 'start' + # out of bounds + offset = self.start * self.parent.getstrides()[0] + self.parent.setbytes(offset + start, string) + + def get_raw_address(self): + from rpython.rtyper.lltypesystem import rffi + offset = self.start * self.parent.getstrides()[0] + return rffi.ptradd(self.parent.get_raw_address(), offset) + + def getformat(self): + return self.parent.getformat() + + def getitemsize(self): + return self.parent.getitemsize() + + def getndim(self): + return self.parent.getndim() + + def getshape(self): + return self.shape + + def getstrides(self): + return self.strides + + def parent_index(self, idx): + return self.start + self.step * idx + + def w_getitem(self, space, idx): + return self.parent.w_getitem(space, self.parent_index(idx)) + + def new_slice(self, start, step, slicelength): + real_start = start + self.start + real_step = self.step * step + return BufferSlice(self.parent, real_start, real_step, slicelength) + + def setitem_w(self, space, idx, w_obj): + return self.parent.setitem_w(space, self.parent_index(idx), w_obj) diff --git a/pypy/objspace/std/bufferobject.py b/pypy/objspace/std/bufferobject.py --- a/pypy/objspace/std/bufferobject.py +++ b/pypy/objspace/std/bufferobject.py @@ -5,6 +5,7 @@ from rpython.rlib.objectmodel import compute_hash from pypy.interpreter.baseobjspace import W_Root +from pypy.interpreter.buffer import SimpleView from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec from pypy.interpreter.typedef import TypeDef @@ -19,7 +20,7 @@ def buffer_w(self, space, flags): space.check_buf_flags(flags, self.buf.readonly) - return self.buf + return SimpleView(self.buf) def readbuf_w(self, space): return self.buf diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -8,6 +8,7 @@ from rpython.rlib.rstring import StringBuilder, replace from pypy.interpreter.baseobjspace import W_Root +from pypy.interpreter.buffer import SimpleView from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import ( WrappedDefault, interp2app, interpindirect2app, unwrap_spec) @@ -455,7 +456,7 @@ def buffer_w(self, space, flags): space.check_buf_flags(flags, True) - return StringBuffer(self._value) + return SimpleView(StringBuffer(self._value)) def readbuf_w(self, space): return StringBuffer(self._value) diff --git a/pypy/objspace/std/memoryobject.py b/pypy/objspace/std/memoryobject.py --- a/pypy/objspace/std/memoryobject.py +++ b/pypy/objspace/std/memoryobject.py @@ -3,48 +3,78 @@ """ import operator -from rpython.rlib.buffer import Buffer, SubBuffer +from rpython.rlib.buffer import SubBuffer from pypy.interpreter.baseobjspace import W_Root +from pypy.interpreter.buffer import BufferView from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app from pypy.interpreter.typedef import TypeDef, GetSetProperty +MEMORYVIEW_MAX_DIM = 64 +MEMORYVIEW_SCALAR = 0x0001 +MEMORYVIEW_C = 0x0002 +MEMORYVIEW_FORTRAN = 0x0004 +MEMORYVIEW_SCALAR = 0x0008 +MEMORYVIEW_PIL = 0x0010 + class W_MemoryView(W_Root): """Implement the built-in 'memoryview' type as a wrapper around an interp-level buffer. """ - _attrs_ = ['buf'] - def __init__(self, buf): - assert isinstance(buf, Buffer) - self.buf = buf + def __init__(self, view): + assert isinstance(view, BufferView) + self.view = view + self._hash = -1 + self.flags = 0 + self._init_flags() + + def getndim(self): + return self.view.getndim() + + def getshape(self): + return self.view.getshape() + + def getstrides(self): + return self.view.getstrides() + + def getitemsize(self): + return self.view.getitemsize() + + def getformat(self): + return self.view.getformat() def buffer_w(self, space, flags): - space.check_buf_flags(flags, self.buf.readonly) - return self.buf + self._check_released(space) + space.check_buf_flags(flags, self.view.readonly) + return self.view @staticmethod def descr_new_memoryview(space, w_subtype, w_object): - return W_MemoryView(space.buffer_w(w_object, space.BUF_FULL_RO)) + if isinstance(w_object, W_MemoryView): + w_object._check_released(space) + return W_MemoryView.copy(w_object) + view = space.buffer_w(w_object, space.BUF_FULL_RO) + return view.wrap(space) def _make_descr__cmp(name): def descr__cmp(self, space, w_other): if isinstance(w_other, W_MemoryView): # xxx not the most efficient implementation - str1 = self.as_str() - str2 = w_other.as_str() + str1 = self.view.as_str() + str2 = w_other.view.as_str() return space.newbool(getattr(operator, name)(str1, str2)) try: - buf = space.buffer_w(w_other, space.BUF_CONTIG_RO) + view = space.buffer_w(w_other, space.BUF_CONTIG_RO) except OperationError as e: if not e.match(space, space.w_TypeError): raise return space.w_NotImplemented else: - str1 = self.as_str() - str2 = buf.as_str() + str1 = self.view.as_str() + str2 = view.as_str() return space.newbool(getattr(operator, name)(str1, str2)) descr__cmp.func_name = name return descr__cmp @@ -60,101 +90,149 @@ return self.buf.as_str() def getlength(self): - return self.buf.getlength() + return self.view.getlength() def descr_tobytes(self, space): - return space.newbytes(self.as_str()) + self._check_released(space) + return space.newbytes(self.view.as_str()) def descr_tolist(self, space): - buf = self.buf - result = [] - for i in range(buf.getlength()): - result.append(space.newint(ord(buf.getitem(i)))) - return space.newlist(result) + self._check_released(space) + return self.view.w_tolist(space) + + def _decode_index(self, space, w_index, is_slice): + shape = self.getshape() + if len(shape) == 0: + count = 1 + else: + count = shape[0] + return space.decode_index4(w_index, count) def descr_getitem(self, space, w_index): - start, stop, step, size = space.decode_index4(w_index, self.getlength()) - itemsize = self.buf.getitemsize() - if itemsize > 1: - start *= itemsize - size *= itemsize - stop = start + size - if step == 0: - step = 1 - if stop > self.getlength(): - raise oefmt(space.w_IndexError, 'index out of range') + is_slice = space.isinstance_w(w_index, space.w_slice) + start, stop, step, slicelength = self._decode_index(space, w_index, is_slice) + # ^^^ for a non-slice index, this returns (index, 0, 0, 1) if step not in (0, 1): raise oefmt(space.w_NotImplementedError, "") if step == 0: # index only - return space.newbytes(self.buf.getitem(start)) + dim = self.getndim() + if dim == 0: + raise oefmt(space.w_TypeError, "invalid indexing of 0-dim memory") + elif dim == 1: + return self.view.w_getitem(space, start) + else: + raise oefmt(space.w_NotImplementedError, "multi-dimensional sub-views are not implemented") + elif is_slice: + return self.view.new_slice(start, step, slicelength).wrap(space) + # multi index is handled at the top of this function else: - buf = SubBuffer(self.buf, start, size) - return W_MemoryView(buf) + raise TypeError("memoryview: invalid slice key") def descr_setitem(self, space, w_index, w_obj): - if self.buf.readonly: + self._check_released(space) + if self.view.readonly: raise oefmt(space.w_TypeError, "cannot modify read-only memory") start, stop, step, size = space.decode_index4(w_index, self.getlength()) - itemsize = self.buf.getitemsize() - if itemsize > 1: - start *= itemsize - size *= itemsize - stop = start + size - if step == 0: - step = 1 - if stop > self.getlength(): - raise oefmt(space.w_IndexError, 'index out of range') if step not in (0, 1): raise oefmt(space.w_NotImplementedError, "") - value = space.buffer_w(w_obj, space.BUF_CONTIG_RO) - if value.getlength() != size: - raise oefmt(space.w_ValueError, - "cannot modify size of memoryview object") + is_slice = space.isinstance_w(w_index, space.w_slice) + start, stop, step, slicelength = self._decode_index(space, w_index, is_slice) + itemsize = self.getitemsize() if step == 0: # index only - self.buf.setitem(start, value.getitem(0)) + self.view.setitem_w(space, start, w_obj) elif step == 1: - self.buf.setslice(start, value.as_str()) + value = space.buffer_w(w_obj, space.BUF_CONTIG_RO) + if value.getlength() != slicelength * itemsize: + raise oefmt(space.w_ValueError, + "cannot modify size of memoryview object") + self.view.setbytes(start * itemsize, value.as_str()) def descr_len(self, space): - return space.newint(self.buf.getlength() / self.buf.getitemsize()) + self._check_released(space) + dim = self.getndim() + if dim == 0: + return space.newint(1) + shape = self.getshape() + return space.newint(shape[0]) def w_get_format(self, space): - return space.newtext(self.buf.getformat()) + self._check_released(space) + return space.newtext(self.getformat()) def w_get_itemsize(self, space): - return space.newint(self.buf.getitemsize()) + self._check_released(space) + return space.newint(self.getitemsize()) def w_get_ndim(self, space): - return space.newint(self.buf.getndim()) + self._check_released(space) + return space.newint(self.getndim()) def w_is_readonly(self, space): - return space.newbool(bool(self.buf.readonly)) + self._check_released(space) + return space.newbool(bool(self.view.readonly)) def w_get_shape(self, space): - if self.buf.getndim() == 0: + self._check_released(space) + if self.view.getndim() == 0: return space.w_None - return space.newtuple([space.newint(x) for x in self.buf.getshape()]) + return space.newtuple([space.newint(x) for x in self.getshape()]) def w_get_strides(self, space): - if self.buf.getndim() == 0: + self._check_released(space) + if self.view.getndim() == 0: return space.w_None - return space.newtuple([space.newint(x) for x in self.buf.getstrides()]) + return space.newtuple([space.newint(x) for x in self.getstrides()]) def w_get_suboffsets(self, space): + self._check_released(space) # I've never seen anyone filling this field return space.w_None + def _check_released(self, space): + if self.view is None: + raise oefmt(space.w_ValueError, + "operation forbidden on released memoryview object") + def descr_pypy_raw_address(self, space): from rpython.rtyper.lltypesystem import lltype, rffi try: - ptr = self.buf.get_raw_address() + ptr = self.view.get_raw_address() except ValueError: - # report the error using the RPython-level internal repr of self.buf + # report the error using the RPython-level internal repr of + # self.view msg = ("cannot find the underlying address of buffer that " - "is internally %r" % (self.buf,)) + "is internally %r" % (self.view,)) raise OperationError(space.w_ValueError, space.newtext(msg)) return space.newint(rffi.cast(lltype.Signed, ptr)) + def _init_flags(self): + ndim = self.getndim() + flags = 0 + if ndim == 0: + flags |= MEMORYVIEW_SCALAR | MEMORYVIEW_C | MEMORYVIEW_FORTRAN + elif ndim == 1: + shape = self.getshape() + strides = self.getstrides() + if shape[0] == 1 or strides[0] == self.getitemsize(): + flags |= MEMORYVIEW_C | MEMORYVIEW_FORTRAN + else: + ndim = self.getndim() + shape = self.getshape() + strides = self.getstrides() + itemsize = self.getitemsize() + if PyBuffer_isContiguous(None, ndim, shape, strides, + itemsize, 'C'): + flags |= MEMORYVIEW_C + if PyBuffer_isContiguous(None, ndim, shape, strides, + itemsize, 'F'): + flags |= MEMORYVIEW_FORTRAN + + if False: # TODO missing suboffsets + flags |= MEMORYVIEW_PIL + flags &= ~(MEMORYVIEW_C|MEMORYVIEW_FORTRAN) + + self.flags = flags + W_MemoryView.typedef = TypeDef( "memoryview", __doc__ = """\ @@ -182,3 +260,49 @@ _pypy_raw_address = interp2app(W_MemoryView.descr_pypy_raw_address), ) W_MemoryView.typedef.acceptable_as_base_class = False + +def _IsFortranContiguous(ndim, shape, strides, itemsize): + if ndim == 0: + return 1 + if not strides: + return ndim == 1 + sd = itemsize + if ndim == 1: + return shape[0] == 1 or sd == strides[0] + for i in range(ndim): + dim = shape[i] + if dim == 0: + return 1 + if strides[i] != sd: + return 0 + sd *= dim + return 1 + +def _IsCContiguous(ndim, shape, strides, itemsize): + if ndim == 0: + return 1 + if not strides: + return ndim == 1 + sd = itemsize + if ndim == 1: + return shape[0] == 1 or sd == strides[0] + for i in range(ndim - 1, -1, -1): + dim = shape[i] + if dim == 0: + return 1 + if strides[i] != sd: + return 0 + sd *= dim + return 1 + +def PyBuffer_isContiguous(suboffsets, ndim, shape, strides, itemsize, fort): + if suboffsets: + return 0 + if (fort == 'C'): + return _IsCContiguous(ndim, shape, strides, itemsize) + elif (fort == 'F'): + return _IsFortranContiguous(ndim, shape, strides, itemsize) + elif (fort == 'A'): + return (_IsCContiguous(ndim, shape, strides, itemsize) or + _IsFortranContiguous(ndim, shape, strides, itemsize)) + return 0 \ No newline at end of file diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -341,6 +341,9 @@ ret = W_Buffer(obj) return ret + def newmemoryview(self, view): + return W_MemoryView(view) + def newbytes(self, s): assert isinstance(s, str) return W_BytesObject(s) diff --git a/pypy/objspace/std/test/test_memoryobject.py b/pypy/objspace/std/test/test_memoryobject.py --- a/pypy/objspace/std/test/test_memoryobject.py +++ b/pypy/objspace/std/test/test_memoryobject.py @@ -1,13 +1,13 @@ class AppTestMemoryView: def test_basic(self): - v = memoryview("abc") - assert v.tobytes() == "abc" + v = memoryview(b"abc") + assert v.tobytes() == b"abc" assert len(v) == 3 assert list(v) == ['a', 'b', 'c'] assert v.tolist() == [97, 98, 99] assert v[1] == "b" assert v[-1] == "c" - exc = raises(TypeError, "v[1] = 'x'") + exc = raises(TypeError, "v[1] = b'x'") assert str(exc.value) == "cannot modify read-only memory" assert v.readonly is True w = v[1:234] @@ -17,7 +17,7 @@ assert str(exc.value) == "" def test_rw(self): - data = bytearray('abcefg') + data = bytearray(b'abcefg') v = memoryview(data) assert v.readonly is False v[0] = 'z' @@ -32,7 +32,7 @@ assert str(exc.value) == "" def test_memoryview_attrs(self): - v = memoryview("a"*100) + v = memoryview(b"a"*100) assert v.format == "B" assert v.itemsize == 1 assert v.shape == (100,) @@ -40,22 +40,25 @@ assert v.strides == (1,) def test_suboffsets(self): - v = memoryview("a"*100) + v = memoryview(b"a"*100) assert v.suboffsets == None v = memoryview(buffer("a"*100, 2)) assert v.shape == (98,) assert v.suboffsets == None def test_compare(self): - assert memoryview("abc") == "abc" - assert memoryview("abc") == bytearray("abc") - assert memoryview("abc") != 3 + assert memoryview(b"abc") == b"abc" + assert memoryview(b"abc") == bytearray(b"abc") + assert memoryview(b"abc") != 3 assert not memoryview("abc") == u"abc" assert memoryview("abc") != u"abc" assert not u"abc" == memoryview("abc") assert u"abc" != memoryview("abc") def test_pypy_raw_address_base(self): + import sys + if '__pypy__' not in sys.modules: + skip('PyPy-only test') a = memoryview(b"foobar")._pypy_raw_address() assert a != 0 b = memoryview(bytearray(b"foobar"))._pypy_raw_address() diff --git a/rpython/rlib/buffer.py b/rpython/rlib/buffer.py --- a/rpython/rlib/buffer.py +++ b/rpython/rlib/buffer.py @@ -12,7 +12,7 @@ _immutable_ = True def getlength(self): - """Returns the size in bytes (even if getitemsize() > 1).""" + """Return the size in bytes.""" raise NotImplementedError def __len__(self): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit