Author: Armin Rigo <[email protected]>
Branch: py3.5
Changeset: r86740:004521738f2e
Date: 2016-08-30 15:39 +0200
http://bitbucket.org/pypy/pypy/changeset/004521738f2e/

Log:    Refactor a bit code for subx(). Add a shortcut like CPython's for
        buffers for the 2nd argument (e.g. bytearray). Fix the general case
        which CPython also gets wrong.

diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -140,53 +140,60 @@
             return False
         return space.isinstance_w(self.w_pattern, space.w_unicode)
 
+    def getstring(self, w_string):
+        """Accepts a string-like object (str, bytes, bytearray, buffer...)
+        and returns a tuple (len, rpython_unicode, rpython_str, rpython_buf),
+        where only one of the rpython_xxx is non-None.
+        """
+        unicodestr = None
+        string = None
+        buf = None
+        space = self.space
+        if space.isinstance_w(w_string, space.w_unicode):
+            unicodestr = space.unicode_w(w_string)
+            length = len(unicodestr)
+        elif space.isinstance_w(w_string, space.w_str):
+            string = space.str_w(w_string)
+            length = len(string)
+        else:
+            buf = space.readbuf_w(w_string)
+            length = buf.getlength()
+            assert length >= 0
+        return (length, unicodestr, string, buf)
+
     def make_ctx(self, w_string, pos=0, endpos=sys.maxint, flags=0):
         """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for
         searching in the given w_string object."""
         space = self.space
+        length, unicodestr, string, buf = self.getstring(w_string)
         if pos < 0:
             pos = 0
+        elif pos > length:
+            pos = length
         if endpos < pos:
             endpos = pos
+        elif endpos > length:
+            endpos = length
         flags = self.flags | flags
-        if space.isinstance_w(w_string, space.w_unicode):
-            unicodestr = space.unicode_w(w_string)
+        #
+        if unicodestr is not None:
             if self.is_known_bytes():
                 raise oefmt(space.w_TypeError,
                             "can't use a bytes pattern on a string-like "
                             "object")
-            if pos > len(unicodestr):
-                pos = len(unicodestr)
-            if endpos > len(unicodestr):
-                endpos = len(unicodestr)
             return rsre_core.UnicodeMatchContext(self.code, unicodestr,
                                                  pos, endpos, flags)
-        elif space.isinstance_w(w_string, space.w_str):
+        else:
             if self.is_known_unicode():
                 raise oefmt(space.w_TypeError,
                             "can't use a string pattern on a bytes-like "
                             "object")
-            str = space.str_w(w_string)
-            if pos > len(str):
-                pos = len(str)
-            if endpos > len(str):
-                endpos = len(str)
-            return rsre_core.StrMatchContext(self.code, str,
-                                             pos, endpos, flags)
-        else:
-            buf = space.readbuf_w(w_string)
-            if self.is_known_unicode():
-                raise oefmt(space.w_TypeError,
-                            "can't use a string pattern on a bytes-like "
-                            "object")
-            size = buf.getlength()
-            assert size >= 0
-            if pos > size:
-                pos = size
-            if endpos > size:
-                endpos = size
-            return rsre_core.BufMatchContext(self.code, buf,
-                                             pos, endpos, flags)
+            if string is not None:
+                return rsre_core.StrMatchContext(self.code, string,
+                                                 pos, endpos, flags)
+            else:
+                return rsre_core.BufMatchContext(self.code, buf,
+                                                 pos, endpos, flags)
 
     def getmatch(self, ctx, found):
         if found:
@@ -295,27 +302,27 @@
             w_filter = w_ptemplate
             filter_is_callable = True
         else:
-            if space.isinstance_w(w_ptemplate, space.w_unicode):
-                filter_as_unicode = space.unicode_w(w_ptemplate)
+            length, filter_as_unicode, filter_as_string, buf = (
+                self.getstring(w_ptemplate))
+            if filter_as_unicode is not None:
                 literal = u'\\' not in filter_as_unicode
                 use_builder = (
                     space.isinstance_w(w_string, space.w_unicode) and literal)
             else:
-                try:
-                    filter_as_string = space.bytes_w(w_ptemplate)
-                except OperationError as e:
-                    if e.async(space):
-                        raise
-                    literal = False
-                else:
-                    literal = '\\' not in filter_as_string
-                    use_builder = (
-                        space.isinstance_w(w_string, space.w_str) and literal)
+                if buf is not None:
+                    filter_as_string = buf.as_str()
+                literal = '\\' not in filter_as_string
+                use_builder = (
+                    space.isinstance_w(w_string, space.w_str) and literal)
             if literal:
                 w_filter = w_ptemplate
                 filter_is_callable = False
             else:
                 # not a literal; hand it over to the template compiler
+                # FIX for a CPython 3.5 bug: if w_ptemplate is a buffer
+                # (e.g. a bytearray), convert it to a byte string here.
+                if buf is not None:
+                    w_ptemplate = space.newbytes(filter_as_string)
                 w_re = import_re(space)
                 w_filter = space.call_method(w_re, '_subx',
                                              space.wrap(self), w_ptemplate)
diff --git a/pypy/module/_sre/test/test_app_sre.py 
b/pypy/module/_sre/test/test_app_sre.py
--- a/pypy/module/_sre/test/test_app_sre.py
+++ b/pypy/module/_sre/test/test_app_sre.py
@@ -285,6 +285,12 @@
         import re
         assert re.sub('=\w{2}', 'x', '=CA') == 'x'
 
+    def test_sub_bytearray(self):
+        import re
+        assert re.sub(b'a', bytearray(b'A'), b'axa') == b'AxA'
+        # this fails on CPython 3.5:
+        assert re.sub(b'a', bytearray(b'\\n'), b'axa') == b'\nx\n'
+
     def test_match_array(self):
         import re, array
         a = array.array('b', b'hello')
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to