Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3.5
Changeset: r91085:77139bab65b2
Date: 2017-04-18 17:49 +0200
http://bitbucket.org/pypy/pypy/changeset/77139bab65b2/

Log:    CPython Issue #21057: TextIOWrapper now allows the underlying binary
        stream's read() or read1() method to return an arbitrary bytes-like
        object (such as a memoryview).

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -1,6 +1,6 @@
 import sys
 
-from pypy.interpreter.baseobjspace import W_Root
+from pypy.interpreter.baseobjspace import W_Root, BufferInterfaceNotFound
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import WrappedDefault, interp2app, unwrap_spec
 from pypy.interpreter.typedef import (
@@ -584,6 +584,10 @@
             # Given this, we know there was a valid snapshot point
             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
             w_dec_buffer, w_dec_flags = space.unpackiterable(w_state, 2)
+            if not space.isinstance_w(w_dec_buffer, space.w_bytes):
+                msg = "decoder getstate() should have returned a bytes " \
+                      "object not '%T'"
+                raise oefmt(space.w_TypeError, msg, w_dec_buffer)
             dec_buffer = space.bytes_w(w_dec_buffer)
             dec_flags = space.int_w(w_dec_flags)
         else:
@@ -591,16 +595,18 @@
             dec_flags = 0
 
         # Read a chunk, decode it, and put the result in self._decoded_chars
-        w_input = space.call_method(self.w_buffer,
-                                    "read1" if self.has_read1 else "read",
+        func_name = "read1" if self.has_read1 else "read"
+        w_input = space.call_method(self.w_buffer, func_name,
                                     space.newint(self.chunk_size))
 
-        if not space.isinstance_w(w_input, space.w_bytes):
-            msg = "decoder getstate() should have returned a bytes " \
-                  "object not '%T'"
-            raise oefmt(space.w_TypeError, msg, w_input)
+        try:
+            input_buf = w_input.buffer_w(space, space.BUF_SIMPLE)
+        except BufferInterfaceNotFound:
+            msg = ("underlying %s() should have returned a bytes-like "
+                   "object, not '%T'")
+            raise oefmt(space.w_TypeError, msg, func_name, w_input)
 
-        eof = space.len_w(w_input) == 0
+        eof = input_buf.getlength() == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
         check_decoded(space, w_decoded)
@@ -611,7 +617,7 @@
         if self.telling:
             # At the snapshot point, len(dec_buffer) bytes before the read,
             # the next input to be decoded is dec_buffer + input_chunk.
-            next_input = dec_buffer + space.bytes_w(w_input)
+            next_input = dec_buffer + input_buf.as_str()
             self.snapshot = PositionSnapshot(dec_flags, next_input)
 
         return not eof
diff --git a/pypy/module/_io/test/test_textio.py 
b/pypy/module/_io/test/test_textio.py
--- a/pypy/module/_io/test/test_textio.py
+++ b/pypy/module/_io/test/test_textio.py
@@ -1,5 +1,5 @@
 class AppTestTextIO:
-    spaceconfig = dict(usemodules=['_io', '_locale'])
+    spaceconfig = dict(usemodules=['_io', '_locale', 'array'])
 
     def setup_class(cls):
         from rpython.rlib.rarithmetic import INT_MAX, UINT_MAX
@@ -381,6 +381,38 @@
         t = _io.TextIOWrapper(NonbytesStream(u'a'))
         raises(TypeError, t.read)
 
+    def test_read_byteslike(self):
+        import _io as io
+        import array
+
+        class MemviewBytesIO(io.BytesIO):
+            '''A BytesIO object whose read method returns memoryviews
+               rather than bytes'''
+
+            def read1(self, len_):
+                return _to_memoryview(super().read1(len_))
+
+            def read(self, len_):
+                return _to_memoryview(super().read(len_))
+
+        def _to_memoryview(buf):
+            '''Convert bytes-object *buf* to a non-trivial memoryview'''
+
+            arr = array.array('i')
+            idx = len(buf) - len(buf) % arr.itemsize
+            arr.frombytes(buf[:idx])
+            return memoryview(arr)
+
+        r = MemviewBytesIO(b'Just some random string\n')
+        t = io.TextIOWrapper(r, 'utf-8')
+
+        # TextIOwrapper will not read the full string, because
+        # we truncate it to a multiple of the native int size
+        # so that we can construct a more complex memoryview.
+        bytes_val =  _to_memoryview(r.getvalue()).tobytes()
+
+        assert t.read(200) == bytes_val.decode('utf-8')
+
     def test_device_encoding(self):
         import os
         import sys
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to