Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3k
Changeset: r54862:1fb96540cdbc
Date: 2012-05-01 18:33 +0200
http://bitbucket.org/pypy/pypy/changeset/1fb96540cdbc/
Log: hg merge default
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -1947,35 +1947,6 @@
changes in your code for properly supporting 64-bit systems."""
raise NotImplementedError
-@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
-def PyUnicode_DecodeUTF32(space, s, size, errors, byteorder):
- """Decode length bytes from a UTF-32 encoded buffer string and return the
- corresponding Unicode object. errors (if non-NULL) defines the error
- handling. It defaults to "strict".
-
- If byteorder is non-NULL, the decoder starts decoding using the given byte
- order:
-
- *byteorder == -1: little endian
- *byteorder == 0: native order
- *byteorder == 1: big endian
-
- If *byteorder is zero, and the first four bytes of the input data are a
- byte order mark (BOM), the decoder switches to this byte order and the BOM
is
- not copied into the resulting Unicode string. If *byteorder is -1 or
- 1, any byte order mark is copied to the output.
-
- After completion, *byteorder is set to the current byte order at the end
- of input data.
-
- In a narrow build codepoints outside the BMP will be decoded as surrogate
pairs.
-
- If byteorder is NULL, the codec starts in native order mode.
-
- Return NULL if an exception was raised by the codec.
- """
- raise NotImplementedError
-
@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP, Py_ssize_t],
PyObject)
def PyUnicode_DecodeUTF32Stateful(space, s, size, errors, byteorder, consumed):
"""If consumed is NULL, behave like PyUnicode_DecodeUTF32(). If
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py
b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -391,6 +391,42 @@
test("\xFE\xFF\x00\x61\x00\x62\x00\x63\x00\x64", 0, 1)
test("\xFF\xFE\x61\x00\x62\x00\x63\x00\x64\x00", 0, -1)
+ def test_decode_utf32(self, space, api):
+ def test(encoded, endian, realendian=None):
+ encoded_charp = rffi.str2charp(encoded)
+ strict_charp = rffi.str2charp("strict")
+ if endian is not None:
+ if endian < 0:
+ value = -1
+ elif endian > 0:
+ value = 1
+ else:
+ value = 0
+ pendian = lltype.malloc(rffi.INTP.TO, 1, flavor='raw')
+ pendian[0] = rffi.cast(rffi.INT, value)
+ else:
+ pendian = None
+
+ w_ustr = api.PyUnicode_DecodeUTF32(encoded_charp, len(encoded),
strict_charp, pendian)
+ assert space.eq_w(space.call_method(w_ustr, 'encode',
space.wrap('ascii')),
+ space.wrap("ab"))
+
+ rffi.free_charp(encoded_charp)
+ rffi.free_charp(strict_charp)
+ if pendian:
+ if realendian is not None:
+ assert rffi.cast(rffi.INT, realendian) == pendian[0]
+ lltype.free(pendian, flavor='raw')
+
+ test("\x61\x00\x00\x00\x62\x00\x00\x00", -1)
+
+ test("\x61\x00\x00\x00\x62\x00\x00\x00", None)
+
+ test("\x00\x00\x00\x61\x00\x00\x00\x62", 1)
+
+ test("\x00\x00\xFE\xFF\x00\x00\x00\x61\x00\x00\x00\x62", 0, 1)
+ test("\xFF\xFE\x00\x00\x61\x00\x00\x00\x62\x00\x00\x00", 0, -1)
+
def test_compare(self, space, api):
assert api.PyUnicode_Compare(space.wrap('a'), space.wrap('b')) == -1
diff --git a/pypy/module/cpyext/unicodeobject.py
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -529,9 +529,8 @@
string = rffi.charpsize2str(s, size)
- #FIXME: I don't like these prefixes
- if pbyteorder is not None: # correct NULL check?
- llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0]) # compatible
with int?
+ if pbyteorder is not None:
+ llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0])
if llbyteorder < 0:
byteorder = "little"
elif llbyteorder > 0:
@@ -546,11 +545,67 @@
else:
errors = None
- result, length, byteorder = runicode.str_decode_utf_16_helper(string, size,
- errors,
- True, # final ? false for multiple
passes?
- None, # errorhandler
- byteorder)
+ result, length, byteorder = runicode.str_decode_utf_16_helper(
+ string, size, errors,
+ True, # final ? false for multiple passes?
+ None, # errorhandler
+ byteorder)
+ if pbyteorder is not None:
+ pbyteorder[0] = rffi.cast(rffi.INT, byteorder)
+
+ return space.wrap(result)
+
+@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
+def PyUnicode_DecodeUTF32(space, s, size, llerrors, pbyteorder):
+ """Decode length bytes from a UTF-32 encoded buffer string and
+ return the corresponding Unicode object. errors (if non-NULL)
+ defines the error handling. It defaults to "strict".
+
+ If byteorder is non-NULL, the decoder starts decoding using the
+ given byte order:
+ *byteorder == -1: little endian
+ *byteorder == 0: native order
+ *byteorder == 1: big endian
+
+ If *byteorder is zero, and the first four bytes of the input data
+ are a byte order mark (BOM), the decoder switches to this byte
+ order and the BOM is not copied into the resulting Unicode string.
+ If *byteorder is -1 or 1, any byte order mark is copied to the
+ output.
+
+ After completion, *byteorder is set to the current byte order at
+ the end of input data.
+
+ In a narrow build codepoints outside the BMP will be decoded as
+ surrogate pairs.
+
+ If byteorder is NULL, the codec starts in native order mode.
+
+ Return NULL if an exception was raised by the codec.
+ """
+ string = rffi.charpsize2str(s, size)
+
+ if pbyteorder:
+ llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0])
+ if llbyteorder < 0:
+ byteorder = "little"
+ elif llbyteorder > 0:
+ byteorder = "big"
+ else:
+ byteorder = "native"
+ else:
+ byteorder = "native"
+
+ if llerrors:
+ errors = rffi.charp2str(llerrors)
+ else:
+ errors = None
+
+ result, length, byteorder = runicode.str_decode_utf_32_helper(
+ string, size, errors,
+ True, # final ? false for multiple passes?
+ None, # errorhandler
+ byteorder)
if pbyteorder is not None:
pbyteorder[0] = rffi.cast(rffi.INT, byteorder)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit