Author: Armin Rigo <[email protected]>
Branch: py3.6
Changeset: r97862:34cc698bbcd1
Date: 2019-10-25 15:34 +0200
http://bitbucket.org/pypy/pypy/changeset/34cc698bbcd1/
Log: hg merge default
This includes the changes to
unicodehelper._str_decode_utf8_slowpath(). If these changes were not
meant to be merged, just revert that part.
diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -3,10 +3,11 @@
License
=======
-Except when otherwise stated (look for LICENSE files in directories or
-information at the beginning of each file) all software and documentation in
-the 'rpython', 'pypy', 'ctype_configure', 'dotviewer', 'demo', 'lib_pypy',
-'py', and '_pytest' directories is licensed as follows:
+Except when otherwise stated (look for LICENSE files in directories
+or information at the beginning of each file) all software and
+documentation in the 'rpython', 'pypy', 'ctype_configure', 'dotviewer',
+'demo', 'extra_tests', 'include', 'lib_pypy', 'py', and '_pytest'
+directories is licensed as follows:
The MIT License
diff --git a/README.rst b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -9,15 +9,15 @@
The home page for the interpreter is:
- http://pypy.org/
+ https://pypy.org/
If you want to help developing PyPy, this documentation might help you:
- http://doc.pypy.org/
+ https://doc.pypy.org/
More documentation about the RPython framework can be found here:
- http://rpython.readthedocs.io/
+ https://rpython.readthedocs.io/
The source for the documentation is in the pypy/doc directory.
@@ -25,7 +25,7 @@
Using PyPy instead of CPython
-----------------------------
-Please read the information at http://pypy.org/ to find the correct way to
+Please read the information at https://pypy.org/ to find the correct way to
download and use PyPy as an alternative to CPython.
@@ -36,7 +36,7 @@
interpreter. It is time-consuming and requires significant computing resources.
More information can be found here:
- http://doc.pypy.org/en/latest/build.html
+ https://doc.pypy.org/en/latest/build.html
Enjoy and send us feedback!
diff --git a/lib-python/2.7/ensurepip/_bundled/pip-19.2.3-py2.py3-none-any.whl
b/lib-python/2.7/ensurepip/_bundled/pip-19.2.3-py2.py3-none-any.whl
new file mode 100644
index
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8118df8ac1940f8c6cb410fbc18e5fae59872b95
GIT binary patch
[cut]
diff --git
a/lib-python/2.7/ensurepip/_bundled/setuptools-41.2.0-py2.py3-none-any.whl
b/lib-python/2.7/ensurepip/_bundled/setuptools-41.2.0-py2.py3-none-any.whl
new file mode 100644
index
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..82df6f63f4ee97380af0a29d8825ae775333b86d
GIT binary patch
[cut]
diff --git a/lib_pypy/_cffi_ssl/_stdssl/__init__.py
b/lib_pypy/_cffi_ssl/_stdssl/__init__.py
--- a/lib_pypy/_cffi_ssl/_stdssl/__init__.py
+++ b/lib_pypy/_cffi_ssl/_stdssl/__init__.py
@@ -2,8 +2,18 @@
import time
import _thread
import weakref
-from _pypy_openssl import ffi
-from _pypy_openssl import lib
+
+try:
+ from _pypy_openssl import ffi
+ from _pypy_openssl import lib
+except ImportError as e:
+ import os
+ msg = "\n\nThe _ssl cffi module either doesn't exist or is incompatible
with your machine's shared libraries.\n" + \
+ "If you have a compiler installed, you can try to rebuild it by
running:\n" + \
+ "cd %s\n" %
os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + \
+ "%s _ssl_build.py\n" % sys.executable
+ raise ImportError(str(e) + msg)
+
from _cffi_ssl._stdssl.certificate import (_test_decode_cert,
_decode_certificate, _certificate_to_der)
from _cffi_ssl._stdssl.utility import (_str_with_len, _bytes_with_len,
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -41,7 +41,7 @@
"_multibytecodec", "_continuation", "_cffi_backend",
"_csv", "_pypyjson", "_posixsubprocess", "_cppyy", # "micronumpy",
"_jitlog",
- #" _ssl", "_hashlib", "crypt"
+ # "_hashlib", "crypt"
])
import rpython.rlib.rvmprof.cintf
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -9,3 +9,10 @@
Fix segfault when calling descr-methods with no arguments
+.. branch: https-readme
+
+Convert http -> https in README.rst
+
+.. branch: license-update
+
+Update list directories in LICENSE
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -7,6 +7,7 @@
from rpython.rlib.rarithmetic import r_uint, intmask
from rpython.rtyper.lltypesystem import rffi
from pypy.module.unicodedata.interp_ucd import unicodedb
+from rpython.rlib import runicode
@specialize.memo()
def decode_error_handler(space):
@@ -56,7 +57,6 @@
def fsdecode(space, w_string):
from pypy.module._codecs import interp_codecs
- from rpython.rlib import runicode
state = space.fromcache(interp_codecs.CodecState)
errorhandler=state.decode_error_handler
if _WIN32:
@@ -368,7 +368,6 @@
def str_decode_mbcs(s, errors, final, errorhandler, force_ignore=True):
slen = len(s)
- from rpython.rlib import runicode
res, size = runicode.str_decode_mbcs(s, slen, errors, final=final,
errorhandler=errorhandler,
force_ignore=force_ignore)
res_utf8 = runicode.unicode_encode_utf_8(res, size, 'strict')
@@ -389,139 +388,150 @@
"""
if errors is None:
errors = 'strict'
- slen = len(s)
- res = StringBuilder(slen)
+ size = len(s)
+ result = StringBuilder(size)
pos = 0
- end = len(s)
- while pos < end:
+ while pos < size:
ordch1 = ord(s[pos])
# fast path for ASCII
+ # XXX maybe use a while loop here
if ordch1 <= 0x7F:
pos += 1
- res.append(chr(ordch1))
+ result.append(chr(ordch1))
continue
- if ordch1 <= 0xC1:
- r, pos, rettype = errorhandler(errors, "utf-8", "invalid start
byte",
- s, pos, pos + 1)
- res.append(r)
- continue
+ n = ord(runicode._utf8_code_length[ordch1 - 0x80])
+ if pos + n > size:
+ if not final:
+ break
+ # argh, this obscure block of code is mostly a copy of
+ # what follows :-(
+ charsleft = size - pos - 1 # either 0, 1, 2
+ # note: when we get the 'unexpected end of data' we need
+ # to care about the pos returned; it can be lower than size,
+ # in case we need to continue running this loop
+ if not charsleft:
+ # there's only the start byte and nothing else
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'unexpected end of data',
+ s, pos, pos+1)
+ result.append(r)
+ continue
+ ordch2 = ord(s[pos+1])
+ if n == 3:
+ # 3-bytes seq with only a continuation byte
+ if rutf8._invalid_byte_2_of_3(ordch1, ordch2,
allow_surrogates):
+ # second byte invalid, take the first and continue
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
+ result.append(r)
+ continue
+ else:
+ # second byte valid, but third byte missing
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'unexpected end of data',
+ s, pos, pos+2)
+ result.append(r)
+ continue
+ elif n == 4:
+ # 4-bytes seq with 1 or 2 continuation bytes
+ if rutf8._invalid_byte_2_of_4(ordch1, ordch2):
+ # second byte invalid, take the first and continue
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
+ result.append(r)
+ continue
+ elif charsleft == 2 and
rutf8._invalid_byte_3_of_4(ord(s[pos+2])):
+ # third byte invalid, take the first two and continue
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+2)
+ result.append(r)
+ continue
+ else:
+ # there's only 1 or 2 valid cb, but the others are missing
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'unexpected end of data',
+ s, pos, pos+charsleft+1)
+ result.append(r)
+ continue
+ raise AssertionError("unreachable")
- pos += 1
+ if n == 0:
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid start byte',
+ s, pos, pos+1)
+ result.append(r)
- if ordch1 <= 0xDF:
- if pos >= end:
- if not final:
- pos -= 1
- break
- r, pos, rettype = errorhandler(errors, "utf-8", "unexpected
end of data",
- s, pos - 1, pos)
- res.append(r)
- continue
- ordch2 = ord(s[pos])
+ elif n == 1:
+ assert 0, "ascii should have gone through the fast path"
+ elif n == 2:
+ ordch2 = ord(s[pos+1])
if rutf8._invalid_byte_2_of_2(ordch2):
- r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
- s, pos - 1, pos)
- res.append(r)
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
+ result.append(r)
continue
# 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz
- pos += 1
- res.append(chr(ordch1))
- res.append(chr(ordch2))
- continue
+ result.append(chr(ordch1))
+ result.append(chr(ordch2))
+ pos += 2
- if ordch1 <= 0xEF:
- if (pos + 2) > end:
- if not final:
- pos -= 1
- break
- if (pos) < end and rutf8._invalid_byte_2_of_3(ordch1,
- ord(s[pos]), allow_surrogates):
- msg = "invalid continuation byte"
- r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
- pos - 1, pos)
- else:
- msg = "unexpected end of data"
- r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
- pos - 1, pos)
- pos = end
- res.append(r)
- continue
- ordch2 = ord(s[pos])
- ordch3 = ord(s[pos + 1])
-
+ elif n == 3:
+ ordch2 = ord(s[pos+1])
+ ordch3 = ord(s[pos+2])
if rutf8._invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates):
- r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
- s, pos - 1, pos)
- res.append(r)
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
+ result.append(r)
continue
elif rutf8._invalid_byte_3_of_3(ordch3):
- r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
- s, pos - 1, pos + 1)
- res.append(r)
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+2)
+ result.append(r)
continue
- pos += 2
+ # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
+ result.append(chr(ordch1))
+ result.append(chr(ordch2))
+ result.append(chr(ordch3))
+ pos += 3
- # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
- res.append(chr(ordch1))
- res.append(chr(ordch2))
- res.append(chr(ordch3))
- continue
-
- if ordch1 <= 0xF4:
- if (pos + 3) > end:
- if not final:
- pos -= 1
- break
- if pos < end and rutf8._invalid_byte_2_of_4(ordch1,
ord(s[pos])):
- msg = "invalid continuation byte"
- r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
- pos - 1, pos)
- elif pos + 1 < end and rutf8._invalid_byte_3_of_4(ord(s[pos +
1])):
- msg = "invalid continuation byte"
- pos += 1
- r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
- pos - 2, pos)
- else:
- msg = "unexpected end of data"
- r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
- pos - 1, pos)
- pos = end
- res.append(r)
- continue
- ordch2 = ord(s[pos])
- ordch3 = ord(s[pos + 1])
- ordch4 = ord(s[pos + 2])
+ elif n == 4:
+ ordch2 = ord(s[pos+1])
+ ordch3 = ord(s[pos+2])
+ ordch4 = ord(s[pos+3])
if rutf8._invalid_byte_2_of_4(ordch1, ordch2):
- r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
- s, pos - 1, pos)
- res.append(r)
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
+ result.append(r)
continue
elif rutf8._invalid_byte_3_of_4(ordch3):
- r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
- s, pos - 1, pos + 1)
- res.append(r)
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+2)
+ result.append(r)
continue
elif rutf8._invalid_byte_4_of_4(ordch4):
- r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
- s, pos - 1, pos + 2)
- res.append(r)
+ r, pos, rettype = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+3)
+ result.append(r)
continue
+ # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz
+ result.append(chr(ordch1))
+ result.append(chr(ordch2))
+ result.append(chr(ordch3))
+ result.append(chr(ordch4))
+ pos += 4
- pos += 3
- # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz
- res.append(chr(ordch1))
- res.append(chr(ordch2))
- res.append(chr(ordch3))
- res.append(chr(ordch4))
- continue
-
- r, pos, rettype = errorhandler(errors, "utf-8", "invalid start byte",
- s, pos - 1, pos)
- res.append(r)
-
- r = res.build()
+ r = result.build()
return r, rutf8.check_utf8(r, True), pos
hexdigits = "0123456789ABCDEFabcdef"
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -1447,3 +1447,17 @@
assert res == 52
raises(TypeError, u"abc".encode, "test.mynontextenc")
raises(TypeError, b"abc".decode, "test.mynontextenc")
+
+ def test_last_byte_handler(self):
+ # issue bb-2389
+ import _codecs
+ _codecs.register_error('custom_replace', lambda exc: (u'\ufffd',
exc.start+1))
+ for s, res in ((b"WORD\xe3\xab",
+ (u'WORD\ufffd\ufffd', u'WORD\ufffd')),
+ (b"\xef\xbb\xbfWORD\xe3\xabWORD2",
+ (u'\ufeffWORD\ufffd\ufffdWORD2',
+ u'\ufeffWORD\ufffdWORD2'))):
+ r = s.decode('utf8', 'replace')
+ assert r == res[1]
+ r = s.decode('utf8', 'custom_replace')
+ assert r == res[0]
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -66,6 +66,12 @@
from rpython.rlib.rgc import increase_root_stack_depth
if new_limit <= 0:
raise oefmt(space.w_ValueError, "recursion limit must be positive")
+ # Some programs use very large values to mean "don't check, I want to
+ # use as much as possible and then segfault". Add a silent upper bound
+ # of 10**6 here, because huge values cause huge shadowstacks to be
+ # allocated (or MemoryErrors).
+ if new_limit > 1000000:
+ new_limit = 1000000
try:
_stack_set_length_fraction(new_limit * 0.001)
_stack_check_noinline()
diff --git a/pypy/objspace/std/test/test_newformat.py
b/pypy/objspace/std/test/test_newformat.py
--- a/pypy/objspace/std/test/test_newformat.py
+++ b/pypy/objspace/std/test/test_newformat.py
@@ -210,6 +210,13 @@
fmtstr = self.s("{:[XYZ}")
assert fmtstr.format(Foo()) == "<<%r>>" % (self.s("[XYZ"),)
+ def test_issue3100(self):
+ class Foo:
+ def __format__(self, f):
+ return '<<%r>>' % (f,)
+ fmtstr = self.s("{:[XYZ}")
+ assert fmtstr.format(Foo()) == "<<%r>>" % (self.s("[XYZ"),)
+
class AppTestUnicodeFormat(BaseStringFormatTests):
def setup_class(cls):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit