Author: Philip Jenvey <pjen...@underboss.org>
Branch: py3k
Changeset: r84440:7d1402694892
Date: 2016-05-14 13:31 -0700
http://bitbucket.org/pypy/pypy/changeset/7d1402694892/

Log:    cpython issue2382: adjust SyntaxError offset w/ multibyte chars

diff --git a/pypy/interpreter/pyparser/error.py 
b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -13,17 +13,23 @@
 
     def wrap_info(self, space):
         w_text = w_filename = space.w_None
+        offset = self.offset
         if self.text is not None:
             from rpython.rlib.runicode import str_decode_utf_8
-            # self.text may not be UTF-8 in case of decoding errors
-            w_text = space.wrap(str_decode_utf_8(self.text, len(self.text),
-                                                 'replace')[0])
+            # self.text may not be UTF-8 in case of decoding errors.
+            # adjust the encoded text offset to a decoded offset
+            text, _ = str_decode_utf_8(self.text, offset, 'replace')
+            offset = len(text)
+            if len(self.text) != offset:
+                text, _ = str_decode_utf_8(self.text, len(self.text),
+                                           'replace')
+            w_text = space.wrap(text)
         if self.filename is not None:
             w_filename = space.fsdecode(space.wrapbytes(self.filename))
         return space.newtuple([space.wrap(self.msg),
                                space.newtuple([w_filename,
                                                space.wrap(self.lineno),
-                                               space.wrap(self.offset),
+                                               space.wrap(offset),
                                                w_text,
                                                space.wrap(self.lastlineno)])])
 
diff --git a/pypy/interpreter/test/test_syntax.py 
b/pypy/interpreter/test/test_syntax.py
--- a/pypy/interpreter/test/test_syntax.py
+++ b/pypy/interpreter/test/test_syntax.py
@@ -720,6 +720,11 @@
             print_error()
             # implicit "del e" here
 
+    def test_cpython_issue2382(self):
+        code = 'Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +'
+        exc = raises(SyntaxError, compile, code, 'foo', 'exec')
+        assert exc.value.offset in (19, 20) # pypy, cpython
+
 
 if __name__ == '__main__':
     # only to check on top of CPython (you need 2.4)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to