Author: Philip Jenvey <pjen...@underboss.org> Branch: py3k Changeset: r84440:7d1402694892 Date: 2016-05-14 13:31 -0700 http://bitbucket.org/pypy/pypy/changeset/7d1402694892/
Log: cpython issue2382: adjust SyntaxError offset w/ multibyte chars diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py --- a/pypy/interpreter/pyparser/error.py +++ b/pypy/interpreter/pyparser/error.py @@ -13,17 +13,23 @@ def wrap_info(self, space): w_text = w_filename = space.w_None + offset = self.offset if self.text is not None: from rpython.rlib.runicode import str_decode_utf_8 - # self.text may not be UTF-8 in case of decoding errors - w_text = space.wrap(str_decode_utf_8(self.text, len(self.text), - 'replace')[0]) + # self.text may not be UTF-8 in case of decoding errors. + # adjust the encoded text offset to a decoded offset + text, _ = str_decode_utf_8(self.text, offset, 'replace') + offset = len(text) + if len(self.text) != offset: + text, _ = str_decode_utf_8(self.text, len(self.text), + 'replace') + w_text = space.wrap(text) if self.filename is not None: w_filename = space.fsdecode(space.wrapbytes(self.filename)) return space.newtuple([space.wrap(self.msg), space.newtuple([w_filename, space.wrap(self.lineno), - space.wrap(self.offset), + space.wrap(offset), w_text, space.wrap(self.lastlineno)])]) diff --git a/pypy/interpreter/test/test_syntax.py b/pypy/interpreter/test/test_syntax.py --- a/pypy/interpreter/test/test_syntax.py +++ b/pypy/interpreter/test/test_syntax.py @@ -720,6 +720,11 @@ print_error() # implicit "del e" here + def test_cpython_issue2382(self): + code = 'Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +' + exc = raises(SyntaxError, compile, code, 'foo', 'exec') + assert exc.value.offset in (19, 20) # pypy, cpython + if __name__ == '__main__': # only to check on top of CPython (you need 2.4) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit