Author: Maciej Fijalkowski <[email protected]>
Branch:
Changeset: r60240:291b1440912f
Date: 2013-01-20 17:00 +0200
http://bitbucket.org/pypy/pypy/changeset/291b1440912f/
Log: err, remove unnecessary changes
diff --git a/pypy/module/unicodedata/interp_ucd.py
b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -9,7 +9,7 @@
from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib.runicode import MAXUNICODE
from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_3_2_0
-from rpython.rlib.unicodedata.ucd import code_to_unichr, ORD
+from rpython.rlib.runicode import code_to_unichr, ORD
import sys
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -3,9 +3,91 @@
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
from rpython.rlib.rarithmetic import r_uint, intmask
from rpython.rlib.unicodedata import unicodedb
-from rpython.rlib.unicodedata.ucd import MAXUNICODE, UNICHR, BYTEORDER
+from rpython.rtyper.lltypesystem import lltype, rffi
+if rffi.sizeof(lltype.UniChar) == 4:
+ MAXUNICODE = 0x10ffff
+else:
+ MAXUNICODE = 0xffff
+
+BYTEORDER = sys.byteorder
+
+if MAXUNICODE > sys.maxunicode:
+ # A version of unichr which allows codes outside the BMP
+ # even on narrow unicode builds.
+ # It will be used when interpreting code on top of a UCS2 CPython,
+ # when sizeof(wchar_t) == 4.
+ # Note that Python3 uses a similar implementation.
+ def UNICHR(c):
+ assert not we_are_translated()
+ if c <= sys.maxunicode or c > MAXUNICODE:
+ return unichr(c)
+ else:
+ c -= 0x10000
+ return (unichr(0xD800 + (c >> 10)) +
+ unichr(0xDC00 + (c & 0x03FF)))
+ UNICHR._flowspace_rewrite_directly_as_ = unichr
+ # ^^^ NB.: for translation, it's essential to use this hack instead
+ # of calling unichr() from UNICHR(), because unichr() detects if there
+ # is a "try:except ValueError" immediately around it.
+
+ def ORD(u):
+ assert not we_are_translated()
+ if isinstance(u, unicode) and len(u) == 2:
+ ch1 = ord(u[0])
+ ch2 = ord(u[1])
+ if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
+ return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
+ return ord(u)
+ ORD._flowspace_rewrite_directly_as_ = ord
+
+else:
+ UNICHR = unichr
+ ORD = ord
+
+if MAXUNICODE > 0xFFFF:
+ def code_to_unichr(code):
+ if not we_are_translated() and sys.maxunicode == 0xFFFF:
+ # Host CPython is narrow build, generate surrogates
+ return UNICHR(code)
+ else:
+ return unichr(code)
+else:
+ def code_to_unichr(code):
+ # generate surrogates for large codes
+ return UNICHR(code)
+
+
+def UNICHR(c):
+ if c <= sys.maxunicode and c <= MAXUNICODE:
+ return unichr(c)
+ else:
+ c -= 0x10000
+ return (unichr(0xD800 + (c >> 10)) +
+ unichr(0xDC00 + (c & 0x03FF)))
+
+def ORD(u):
+ assert isinstance(u, unicode)
+ if len(u) == 1:
+ return ord(u[0])
+ elif len(u) == 2:
+ ch1 = ord(u[0])
+ ch2 = ord(u[1])
+ if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
+ return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
+ raise ValueError
+
+def _STORECHAR(result, CH, byteorder):
+ hi = chr(((CH) >> 8) & 0xff)
+ lo = chr((CH) & 0xff)
+ if byteorder == 'little':
+ result.append(lo)
+ result.append(hi)
+ else:
+ result.append(hi)
+ result.append(lo)
+
def default_unicode_error_decode(errors, encoding, msg, s,
startingpos, endingpos):
if errors == 'replace':
diff --git a/rpython/rlib/unicodedata/ucd.py b/rpython/rlib/unicodedata/ucd.py
deleted file mode 100644
--- a/rpython/rlib/unicodedata/ucd.py
+++ /dev/null
@@ -1,87 +0,0 @@
-
-import sys
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib.objectmodel import we_are_translated
-
-
-if rffi.sizeof(lltype.UniChar) == 4:
- MAXUNICODE = 0x10ffff
-else:
- MAXUNICODE = 0xffff
-
-BYTEORDER = sys.byteorder
-
-if MAXUNICODE > sys.maxunicode:
- # A version of unichr which allows codes outside the BMP
- # even on narrow unicode builds.
- # It will be used when interpreting code on top of a UCS2 CPython,
- # when sizeof(wchar_t) == 4.
- # Note that Python3 uses a similar implementation.
- def UNICHR(c):
- assert not we_are_translated()
- if c <= sys.maxunicode or c > MAXUNICODE:
- return unichr(c)
- else:
- c -= 0x10000
- return (unichr(0xD800 + (c >> 10)) +
- unichr(0xDC00 + (c & 0x03FF)))
- UNICHR._flowspace_rewrite_directly_as_ = unichr
- # ^^^ NB.: for translation, it's essential to use this hack instead
- # of calling unichr() from UNICHR(), because unichr() detects if there
- # is a "try:except ValueError" immediately around it.
-
- def ORD(u):
- assert not we_are_translated()
- if isinstance(u, unicode) and len(u) == 2:
- ch1 = ord(u[0])
- ch2 = ord(u[1])
- if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
- return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
- return ord(u)
- ORD._flowspace_rewrite_directly_as_ = ord
-
-else:
- UNICHR = unichr
- ORD = ord
-
-if MAXUNICODE > 0xFFFF:
- def code_to_unichr(code):
- if not we_are_translated() and sys.maxunicode == 0xFFFF:
- # Host CPython is narrow build, generate surrogates
- return UNICHR(code)
- else:
- return unichr(code)
-else:
- def code_to_unichr(code):
- # generate surrogates for large codes
- return UNICHR(code)
-
-
-def UNICHR(c):
- if c <= sys.maxunicode and c <= MAXUNICODE:
- return unichr(c)
- else:
- c -= 0x10000
- return (unichr(0xD800 + (c >> 10)) +
- unichr(0xDC00 + (c & 0x03FF)))
-
-def ORD(u):
- assert isinstance(u, unicode)
- if len(u) == 1:
- return ord(u[0])
- elif len(u) == 2:
- ch1 = ord(u[0])
- ch2 = ord(u[1])
- if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
- return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
- raise ValueError
-
-def _STORECHAR(result, CH, byteorder):
- hi = chr(((CH) >> 8) & 0xff)
- lo = chr((CH) & 0xff)
- if byteorder == 'little':
- result.append(lo)
- result.append(hi)
- else:
- result.append(hi)
- result.append(lo)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit