Author: Armin Rigo <[email protected]>
Branch:
Changeset: r96552:6efbf166cc90
Date: 2019-04-29 10:14 +0200
http://bitbucket.org/pypy/pypy/changeset/6efbf166cc90/
Log: Consolidate code, avoids obscure checking of individual characters
to know if the numeric value will be in range
diff --git a/pypy/module/_pypyjson/interp_decoder.py
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -365,12 +365,14 @@
hexdigits = self.getslice(start, i)
try:
val = int(hexdigits, 16)
- if sys.maxunicode > 65535 and 0xd800 <= val <= 0xdbff:
- # surrogate pair
- if (self.ll_chars[i] == '\\' and self.ll_chars[i+1] == 'u' and
- self.ll_chars[i+2] in 'dD' and
- self.ll_chars[i+3] in 'cdefCDEF'):
- val = self.decode_surrogate_pair(i, val)
+ if (0xd800 <= val <= 0xdbff and
+ self.ll_chars[i] == '\\' and self.ll_chars[i+1] == 'u'):
+ hexdigits = self.getslice(i+2, i+6)
+ lowsurr = int(hexdigits, 16)
+ if 0xdc00 <= lowsurr <= 0xdfff:
+ # decode surrogate pair
+ val = 0x10000 + (((val - 0xd800) << 10) |
+ (lowsurr - 0xdc00))
i += 6
except ValueError:
self._raise("Invalid \uXXXX escape (char %d)", i-1)
@@ -381,15 +383,6 @@
builder.append(utf8_ch)
return i
- def decode_surrogate_pair(self, i, highsurr):
- """ uppon enter the following must hold:
- chars[i] == "\\" and chars[i+1] == "u"
- """
- i += 2
- hexdigits = self.getslice(i, i+4)
- lowsurr = int(hexdigits, 16) # the possible ValueError is caugth by
the caller
- return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00))
-
def decode_key(self, i):
""" returns a wrapped unicode """
from rpython.rlib.rarithmetic import intmask
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit