Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r91518:6a4af0b6b51c Date: 2017-06-05 08:24 +0200 http://bitbucket.org/pypy/pypy/changeset/6a4af0b6b51c/
Log: hg merge cffi-char16-char32 Support the char16_t and char32_t types in cffi. This means reintroducing some surrogate handling in one of the two directions, depending on the size of unichar. diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h --- a/lib_pypy/cffi/_cffi_include.h +++ b/lib_pypy/cffi/_cffi_include.h @@ -159,9 +159,9 @@ #define _cffi_from_c_struct \ ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[18]) #define _cffi_to_c_wchar_t \ - ((wchar_t(*)(PyObject *))_cffi_exports[19]) + ((_cffi_wchar_t(*)(PyObject *))_cffi_exports[19]) #define _cffi_from_c_wchar_t \ - ((PyObject *(*)(wchar_t))_cffi_exports[20]) + ((PyObject *(*)(_cffi_wchar_t))_cffi_exports[20]) #define _cffi_to_c_long_double \ ((long double(*)(PyObject *))_cffi_exports[21]) #define _cffi_to_c__Bool \ @@ -174,7 +174,11 @@ #define _CFFI_CPIDX 25 #define _cffi_call_python \ ((void(*)(struct _cffi_externpy_s *, char *))_cffi_exports[_CFFI_CPIDX]) -#define _CFFI_NUM_EXPORTS 26 +#define _cffi_to_c_wchar3216_t \ + ((int(*)(PyObject *))_cffi_exports[26]) +#define _cffi_from_c_wchar3216_t \ + ((PyObject *(*)(int))_cffi_exports[27]) +#define _CFFI_NUM_EXPORTS 28 struct _cffi_ctypedescr; @@ -215,6 +219,46 @@ return NULL; } + +#ifdef HAVE_WCHAR_H +typedef wchar_t _cffi_wchar_t; +#else +typedef uint16_t _cffi_wchar_t; /* same random pick as _cffi_backend.c */ +#endif + +_CFFI_UNUSED_FN static uint16_t _cffi_to_c_char16_t(PyObject *o) +{ + if (sizeof(_cffi_wchar_t) == 2) + return (uint16_t)_cffi_to_c_wchar_t(o); + else + return (uint16_t)_cffi_to_c_wchar3216_t(o); +} + +_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x) +{ + if (sizeof(_cffi_wchar_t) == 2) + return _cffi_from_c_wchar_t(x); + else + return _cffi_from_c_wchar3216_t(x); +} + +_CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o) +{ + if (sizeof(_cffi_wchar_t) == 4) + return (int)_cffi_to_c_wchar_t(o); + else + return (int)_cffi_to_c_wchar3216_t(o); +} + +_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x) +{ + if (sizeof(_cffi_wchar_t) == 4) + return _cffi_from_c_wchar_t(x); + else + return _cffi_from_c_wchar3216_t(x); +} + + /********** end CPython-specific section **********/ #else _CFFI_UNUSED_FN diff --git a/lib_pypy/cffi/cffi_opcode.py b/lib_pypy/cffi/cffi_opcode.py --- a/lib_pypy/cffi/cffi_opcode.py +++ b/lib_pypy/cffi/cffi_opcode.py @@ -107,9 +107,10 @@ PRIM_UINTMAX = 47 PRIM_FLOATCOMPLEX = 48 PRIM_DOUBLECOMPLEX = 49 +PRIM_CHAR16 = 50 +PRIM_CHAR32 = 51 - -_NUM_PRIM = 50 +_NUM_PRIM = 52 _UNKNOWN_PRIM = -1 _UNKNOWN_FLOAT_PRIM = -2 _UNKNOWN_LONG_DOUBLE = -3 @@ -135,6 +136,8 @@ 'double _Complex': PRIM_DOUBLECOMPLEX, '_Bool': PRIM_BOOL, 'wchar_t': PRIM_WCHAR, + 'char16_t': PRIM_CHAR16, + 'char32_t': PRIM_CHAR32, 'int8_t': PRIM_INT8, 'uint8_t': PRIM_UINT8, 'int16_t': PRIM_INT16, diff --git a/lib_pypy/cffi/model.py b/lib_pypy/cffi/model.py --- a/lib_pypy/cffi/model.py +++ b/lib_pypy/cffi/model.py @@ -122,6 +122,8 @@ '_Bool': 'i', # the following types are not primitive in the C sense 'wchar_t': 'c', + 'char16_t': 'c', + 'char32_t': 'c', 'int8_t': 'i', 'uint8_t': 'i', 'int16_t': 'i', diff --git a/lib_pypy/cffi/parse_c_type.h b/lib_pypy/cffi/parse_c_type.h --- a/lib_pypy/cffi/parse_c_type.h +++ b/lib_pypy/cffi/parse_c_type.h @@ -81,8 +81,10 @@ #define _CFFI_PRIM_UINTMAX 47 #define _CFFI_PRIM_FLOATCOMPLEX 48 #define _CFFI_PRIM_DOUBLECOMPLEX 49 +#define _CFFI_PRIM_CHAR16 50 +#define _CFFI_PRIM_CHAR32 51 -#define _CFFI__NUM_PRIM 50 +#define _CFFI__NUM_PRIM 52 #define _CFFI__UNKNOWN_PRIM (-1) #define _CFFI__UNKNOWN_FLOAT_PRIM (-2) #define _CFFI__UNKNOWN_LONG_DOUBLE (-3) diff --git a/lib_pypy/cffi/recompiler.py b/lib_pypy/cffi/recompiler.py --- a/lib_pypy/cffi/recompiler.py +++ b/lib_pypy/cffi/recompiler.py @@ -3,8 +3,9 @@ from .error import VerificationError from .cffi_opcode import * -VERSION = "0x2601" -VERSION_EMBEDDED = "0x2701" +VERSION_BASE = 0x2601 +VERSION_EMBEDDED = 0x2701 +VERSION_CHAR16CHAR32 = 0x2801 class GlobalExpr: @@ -126,6 +127,10 @@ self.ffi = ffi self.module_name = module_name self.target_is_python = target_is_python + self._version = VERSION_BASE + + def needs_version(self, ver): + self._version = max(self._version, ver) def collect_type_table(self): self._typesdict = {} @@ -304,9 +309,7 @@ prnt('#endif') lines = self._rel_readlines('_embedding.h') prnt(''.join(lines)) - version = VERSION_EMBEDDED - else: - version = VERSION + self.needs_version(VERSION_EMBEDDED) # # then paste the C source given by the user, verbatim. prnt('/************************************************************/') @@ -405,7 +408,7 @@ prnt(' _cffi_call_python_org = ' '(void(*)(struct _cffi_externpy_s *, char *))p[1];') prnt(' }') - prnt(' p[0] = (const void *)%s;' % version) + prnt(' p[0] = (const void *)0x%x;' % self._version) prnt(' p[1] = &_cffi_type_context;') prnt('}') # on Windows, distutils insists on putting init_cffi_xyz in @@ -423,21 +426,22 @@ prnt('PyMODINIT_FUNC') prnt('PyInit_%s(void)' % (base_module_name,)) prnt('{') - prnt(' return _cffi_init("%s", %s, &_cffi_type_context);' % ( - self.module_name, version)) + prnt(' return _cffi_init("%s", 0x%x, &_cffi_type_context);' % ( + self.module_name, self._version)) prnt('}') prnt('#else') prnt('PyMODINIT_FUNC') prnt('init%s(void)' % (base_module_name,)) prnt('{') - prnt(' _cffi_init("%s", %s, &_cffi_type_context);' % ( - self.module_name, version)) + prnt(' _cffi_init("%s", 0x%x, &_cffi_type_context);' % ( + self.module_name, self._version)) prnt('}') prnt('#endif') prnt() prnt('#ifdef __GNUC__') prnt('# pragma GCC visibility pop') prnt('#endif') + self._version = None def _to_py(self, x): if isinstance(x, str): @@ -476,7 +480,8 @@ prnt('from %s import ffi as _ffi%d' % (included_module_name, i)) prnt() prnt("ffi = _cffi_backend.FFI('%s'," % (self.module_name,)) - prnt(" _version = %s," % (VERSION,)) + prnt(" _version = 0x%x," % (self._version,)) + self._version = None # # the '_types' keyword argument self.cffi_types = tuple(self.cffi_types) # don't change any more @@ -515,8 +520,11 @@ # double' here, and _cffi_to_c_double would loose precision converter = '(%s)_cffi_to_c_double' % (tp.get_c_name(''),) else: - converter = '(%s)_cffi_to_c_%s' % (tp.get_c_name(''), + cname = tp.get_c_name('') + converter = '(%s)_cffi_to_c_%s' % (cname, tp.name.replace(' ', '_')) + if cname in ('char16_t', 'char32_t'): + self.needs_version(VERSION_CHAR16CHAR32) errvalue = '-1' # elif isinstance(tp, model.PointerType): @@ -573,7 +581,10 @@ elif isinstance(tp, model.UnknownFloatType): return '_cffi_from_c_double(%s)' % (var,) elif tp.name != 'long double' and not tp.is_complex_type(): - return '_cffi_from_c_%s(%s)' % (tp.name.replace(' ', '_'), var) + cname = tp.name.replace(' ', '_') + if cname in ('char16_t', 'char32_t'): + self.needs_version(VERSION_CHAR16CHAR32) + return '_cffi_from_c_%s(%s)' % (cname, var) else: return '_cffi_from_c_deref((char *)&%s, _cffi_type(%d))' % ( var, self._gettypenum(tp)) diff --git a/lib_pypy/cffi/vengine_cpy.py b/lib_pypy/cffi/vengine_cpy.py --- a/lib_pypy/cffi/vengine_cpy.py +++ b/lib_pypy/cffi/vengine_cpy.py @@ -808,7 +808,8 @@ #include <stddef.h> /* this block of #ifs should be kept exactly identical between - c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py + and cffi/_cffi_include.h */ #if defined(_MSC_VER) # include <malloc.h> /* for alloca() */ # if _MSC_VER < 1600 /* MSVC < 2010 */ @@ -842,11 +843,13 @@ # include <stdint.h> # endif # if _MSC_VER < 1800 /* MSVC < 2013 */ - typedef unsigned char _Bool; +# ifndef __cplusplus + typedef unsigned char _Bool; +# endif # endif #else # include <stdint.h> -# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux) # include <alloca.h> # endif #endif diff --git a/lib_pypy/cffi/vengine_gen.py b/lib_pypy/cffi/vengine_gen.py --- a/lib_pypy/cffi/vengine_gen.py +++ b/lib_pypy/cffi/vengine_gen.py @@ -627,7 +627,8 @@ #include <sys/types.h> /* XXX for ssize_t on some platforms */ /* this block of #ifs should be kept exactly identical between - c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */ + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py + and cffi/_cffi_include.h */ #if defined(_MSC_VER) # include <malloc.h> /* for alloca() */ # if _MSC_VER < 1600 /* MSVC < 2010 */ @@ -661,11 +662,13 @@ # include <stdint.h> # endif # if _MSC_VER < 1800 /* MSVC < 2013 */ - typedef unsigned char _Bool; +# ifndef __cplusplus + typedef unsigned char _Bool; +# endif # endif #else # include <stdint.h> -# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux) # include <alloca.h> # endif #endif diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -6,5 +6,6 @@ .. startrev: 558bd00b3dd8 .. branch: cffi-complex +.. branch: cffi-char16-char32 -Part of the upgrade to cffi 1.11 +The two ``cffi-*`` branches are part of the upgrade to cffi 1.11. diff --git a/pypy/module/_cffi_backend/cffi1_module.py b/pypy/module/_cffi_backend/cffi1_module.py --- a/pypy/module/_cffi_backend/cffi1_module.py +++ b/pypy/module/_cffi_backend/cffi1_module.py @@ -9,7 +9,7 @@ VERSION_MIN = 0x2601 -VERSION_MAX = 0x27FF +VERSION_MAX = 0x28FF VERSION_EXPORT = 0x0A03 diff --git a/pypy/module/_cffi_backend/cffi_opcode.py b/pypy/module/_cffi_backend/cffi_opcode.py --- a/pypy/module/_cffi_backend/cffi_opcode.py +++ b/pypy/module/_cffi_backend/cffi_opcode.py @@ -107,8 +107,10 @@ PRIM_UINTMAX = 47 PRIM_FLOATCOMPLEX = 48 PRIM_DOUBLECOMPLEX = 49 +PRIM_CHAR16 = 50 +PRIM_CHAR32 = 51 -_NUM_PRIM = 50 +_NUM_PRIM = 52 _UNKNOWN_PRIM = -1 _UNKNOWN_FLOAT_PRIM = -2 _UNKNOWN_LONG_DOUBLE = -3 @@ -131,8 +133,12 @@ 'float': PRIM_FLOAT, 'double': PRIM_DOUBLE, 'long double': PRIM_LONGDOUBLE, + 'float _Complex': PRIM_FLOATCOMPLEX, + 'double _Complex': PRIM_DOUBLECOMPLEX, '_Bool': PRIM_BOOL, 'wchar_t': PRIM_WCHAR, + 'char16_t': PRIM_CHAR16, + 'char32_t': PRIM_CHAR32, 'int8_t': PRIM_INT8, 'uint8_t': PRIM_UINT8, 'int16_t': PRIM_INT16, diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py --- a/pypy/module/_cffi_backend/ctypearray.py +++ b/pypy/module/_cffi_backend/ctypearray.py @@ -36,8 +36,7 @@ datasize = self.size # if datasize < 0: - from pypy.module._cffi_backend import misc - w_init, length = misc.get_new_array_length(space, w_init) + w_init, length = self.get_new_array_length(w_init) try: datasize = ovfcheck(length * self.ctitem.size) except OverflowError: @@ -53,6 +52,29 @@ self.convert_from_object(ptr, w_init) return cdata + def get_new_array_length(self, w_value): + space = self.space + if (space.isinstance_w(w_value, space.w_list) or + space.isinstance_w(w_value, space.w_tuple)): + return (w_value, space.int_w(space.len(w_value))) + elif space.isinstance_w(w_value, space.w_bytes): + # from a string, we add the null terminator + s = space.bytes_w(w_value) + return (w_value, len(s) + 1) + elif space.isinstance_w(w_value, space.w_unicode): + from pypy.module._cffi_backend import wchar_helper + u = space.unicode_w(w_value) + if self.ctitem.size == 2: + length = wchar_helper.unicode_size_as_char16(u) + else: + length = wchar_helper.unicode_size_as_char32(u) + return (w_value, length + 1) + else: + explicitlength = space.getindex_w(w_value, space.w_OverflowError) + if explicitlength < 0: + raise oefmt(space.w_ValueError, "negative array length") + return (space.w_None, explicitlength) + def _check_subscript_index(self, w_cdata, i): space = self.space if i < 0: diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -10,7 +10,7 @@ from rpython.rtyper.tool import rfficache from pypy.interpreter.error import oefmt -from pypy.module._cffi_backend import cdataobj, misc +from pypy.module._cffi_backend import cdataobj, misc, wchar_helper from pypy.module._cffi_backend.ctypeobj import W_CType @@ -42,11 +42,13 @@ def cast_unicode(self, w_ob): space = self.space s = space.unicode_w(w_ob) - if len(s) != 1: + try: + ordinal = wchar_helper.unicode_to_ordinal(s) + except ValueError: raise oefmt(space.w_TypeError, "cannot cast unicode string of length %d to ctype '%s'", len(s), self.name) - return ord(s[0]) + return intmask(ordinal) def cast(self, w_ob): from pypy.module._cffi_backend import ctypeptr @@ -148,53 +150,83 @@ return self.space.newbytes(s) -# XXX explicitly use an integer type instead of lltype.UniChar here, -# because for now the latter is defined as unsigned by RPython (even -# though it may be signed when 'wchar_t' is written to C). -WCHAR_INT = {(2, False): rffi.USHORT, - (4, False): rffi.UINT, - (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar), - rfficache.signof_c_type('wchar_t')] -WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT) +class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar): + _attrs_ = ['is_signed_wchar'] + _immutable_fields_ = ['is_signed_wchar'] -class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar): - _attrs_ = [] + _wchar_is_signed = rfficache.signof_c_type('wchar_t') - if rffi.r_wchar_t.SIGN: - def write_raw_integer_data(self, w_cdata, value): - w_cdata.write_raw_signed_data(value) + def __init__(self, space, size, name, name_position, align): + W_CTypePrimitiveCharOrUniChar.__init__(self, space, size, name, + name_position, align) + self.is_signed_wchar = self._wchar_is_signed and (name == "wchar_t") + # "char16_t" and "char32_t" are always unsigned def cast_to_int(self, cdata): - unichardata = rffi.cast(WCHAR_INTP, cdata) - return self.space.newint(unichardata[0]) + if self.is_signed_wchar: + value = misc.read_raw_long_data(cdata, self.size) + return self.space.newint(value) + else: + value = misc.read_raw_ulong_data(cdata, self.size) + if self.size < rffi.sizeof(lltype.Signed): + return self.space.newint(intmask(value)) + else: + return self.space.newint(value) # r_uint => 'long' object def convert_to_object(self, cdata): - unichardata = rffi.cast(rffi.CWCHARP, cdata) - return self.space.newunicode(unichardata[0]) + if self.is_signed_wchar: + unichardata = rffi.cast(rffi.CWCHARP, cdata) + return self.space.newunicode(unichardata[0]) + else: + value = misc.read_raw_ulong_data(cdata, self.size) # r_uint + try: + u = wchar_helper.ordinal_to_unicode(value) + except wchar_helper.OutOfRange as e: + raise oefmt(self.space.w_ValueError, + "char32_t out of range for " + "conversion to unicode: %s", hex(e.ordinal)) + return self.space.newunicode(u) def string(self, cdataobj, maxlen): with cdataobj as ptr: w_res = self.convert_to_object(ptr) return w_res - def _convert_to_unichar(self, w_ob): + def _convert_to_charN_t(self, w_ob): + # returns a r_uint. If self.size == 2, it is smaller than 0x10000 space = self.space if space.isinstance_w(w_ob, space.w_unicode): - s = space.unicode_w(w_ob) - if len(s) == 1: - return s[0] - if (isinstance(w_ob, cdataobj.W_CData) and - isinstance(w_ob.ctype, W_CTypePrimitiveUniChar)): + u = space.unicode_w(w_ob) + try: + ordinal = wchar_helper.unicode_to_ordinal(u) + except ValueError: + pass + else: + if self.size == 2 and ordinal > 0xffff: + raise self._convert_error("single character <= 0xFFFF", + w_ob) + return ordinal + elif (isinstance(w_ob, cdataobj.W_CData) and + isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and + w_ob.ctype.size == self.size): with w_ob as ptr: - return rffi.cast(rffi.CWCHARP, ptr)[0] + return misc.read_raw_ulong_data(ptr, self.size) raise self._convert_error("unicode string of length 1", w_ob) def convert_from_object(self, cdata, w_ob): - value = self._convert_to_unichar(w_ob) - rffi.cast(rffi.CWCHARP, cdata)[0] = value + ordinal = self._convert_to_charN_t(w_ob) + misc.write_raw_unsigned_data(cdata, ordinal, self.size) def unpack_ptr(self, w_ctypeptr, ptr, length): - u = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length) + if self.size == 2: + u = wchar_helper.unicode_from_char16(ptr, length) + else: + try: + u = wchar_helper.unicode_from_char32(ptr, length) + except wchar_helper.OutOfRange as e: + raise oefmt(self.space.w_ValueError, + "char32_t out of range for " + "conversion to unicode: %s", hex(e.ordinal)) return self.space.newunicode(u) diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -4,9 +4,9 @@ from rpython.rlib import rposix from rpython.rlib.rarithmetic import ovfcheck -from rpython.rtyper.annlowlevel import llstr, llunicode +from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw, copy_unicode_to_raw +from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw from pypy.interpreter.error import OperationError, oefmt, wrap_oserror from pypy.module._cffi_backend import cdataobj, misc, ctypeprim, ctypevoid @@ -88,18 +88,28 @@ if n != self.length: cdata[n] = '\x00' elif isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveUniChar): + from pypy.module._cffi_backend import wchar_helper if not space.isinstance_w(w_ob, space.w_unicode): raise self._convert_error("unicode or list or tuple", w_ob) s = space.unicode_w(w_ob) - n = len(s) + if self.ctitem.size == 2: + n = wchar_helper.unicode_size_as_char16(s) + else: + n = wchar_helper.unicode_size_as_char32(s) if self.length >= 0 and n > self.length: raise oefmt(space.w_IndexError, "initializer unicode string is too long for '%s' " "(got %d characters)", self.name, n) - unichardata = rffi.cast(rffi.CWCHARP, cdata) - copy_unicode_to_raw(llunicode(s), unichardata, 0, n) - if n != self.length: - unichardata[n] = u'\x00' + add_final_zero = (n != self.length) + if self.ctitem.size == 2: + try: + wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero) + except wchar_helper.OutOfRange as e: + raise oefmt(self.space.w_ValueError, + "unicode character ouf of range for " + "conversion to char16_t: %s", hex(e.ordinal)) + else: + wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero) else: raise self._convert_error("list or tuple", w_ob) @@ -134,12 +144,12 @@ # # pointer to a wchar_t: builds and returns a unicode if self.is_unichar_ptr_or_array(): - cdata = rffi.cast(rffi.CWCHARP, ptr) - if length < 0: - u = rffi.wcharp2unicode(cdata) + from pypy.module._cffi_backend import wchar_helper + if self.ctitem.size == 2: + length = wchar_helper.measure_length_16(ptr, length) else: - u = rffi.wcharp2unicoden(cdata, length) - return space.newunicode(u) + length = wchar_helper.measure_length_32(ptr, length) + return self.ctitem.unpack_ptr(self, ptr, length) # return W_CType.string(self, cdataobj, maxlen) @@ -302,9 +312,18 @@ if (space.isinstance_w(w_init, space.w_list) or space.isinstance_w(w_init, space.w_tuple)): length = space.int_w(space.len(w_init)) - elif space.isinstance_w(w_init, space.w_basestring): + elif space.isinstance_w(w_init, space.w_bytes): # from a string, we add the null terminator - length = space.int_w(space.len(w_init)) + 1 + s = space.bytes_w(w_init) + length = len(s) + 1 + elif space.isinstance_w(w_init, space.w_unicode): + from pypy.module._cffi_backend import wchar_helper + u = space.unicode_w(w_init) + if self.ctitem.size == 2: + length = wchar_helper.unicode_size_as_char16(u) + else: + length = wchar_helper.unicode_size_as_char32(u) + length += 1 elif self.is_file: result = self.prepare_file(w_init) if result: diff --git a/pypy/module/_cffi_backend/ctypestruct.py b/pypy/module/_cffi_backend/ctypestruct.py --- a/pypy/module/_cffi_backend/ctypestruct.py +++ b/pypy/module/_cffi_backend/ctypestruct.py @@ -244,7 +244,7 @@ ct = self.ctype if isinstance(ct, ctypearray.W_CTypeArray) and ct.length < 0: space = ct.space - w_ob, varsizelength = misc.get_new_array_length(space, w_ob) + w_ob, varsizelength = ct.get_new_array_length(w_ob) if optvarsize != -1: # in this mode, the only purpose of this function is to compute # the real size of the structure from a var-sized C99 array diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -290,21 +290,6 @@ # ____________________________________________________________ -def get_new_array_length(space, w_value): - if (space.isinstance_w(w_value, space.w_list) or - space.isinstance_w(w_value, space.w_tuple)): - return (w_value, space.int_w(space.len(w_value))) - elif space.isinstance_w(w_value, space.w_basestring): - # from a string, we add the null terminator - return (w_value, space.int_w(space.len(w_value)) + 1) - else: - explicitlength = space.getindex_w(w_value, space.w_OverflowError) - if explicitlength < 0: - raise oefmt(space.w_ValueError, "negative array length") - return (space.w_None, explicitlength) - -# ____________________________________________________________ - @specialize.arg(0) def _raw_memcopy_tp(TPP, source, dest): # in its own function: LONGLONG may make the whole function jit-opaque diff --git a/pypy/module/_cffi_backend/newtype.py b/pypy/module/_cffi_backend/newtype.py --- a/pypy/module/_cffi_backend/newtype.py +++ b/pypy/module/_cffi_backend/newtype.py @@ -111,6 +111,9 @@ eptype("size_t", rffi.SIZE_T, ctypeprim.W_CTypePrimitiveUnsigned) eptype("ssize_t", rffi.SSIZE_T, ctypeprim.W_CTypePrimitiveSigned) +eptypesize("char16_t", 2, ctypeprim.W_CTypePrimitiveUniChar) +eptypesize("char32_t", 4, ctypeprim.W_CTypePrimitiveUniChar) + _WCTSigned = ctypeprim.W_CTypePrimitiveSigned _WCTUnsign = ctypeprim.W_CTypePrimitiveUnsigned diff --git a/pypy/module/_cffi_backend/realize_c_type.py b/pypy/module/_cffi_backend/realize_c_type.py --- a/pypy/module/_cffi_backend/realize_c_type.py +++ b/pypy/module/_cffi_backend/realize_c_type.py @@ -73,6 +73,8 @@ "uintmax_t", "float _Complex", "double _Complex", + "char16_t", + "char32_t", ] assert len(NAMES) == cffi_opcode._NUM_PRIM diff --git a/pypy/module/_cffi_backend/src/parse_c_type.c b/pypy/module/_cffi_backend/src/parse_c_type.c --- a/pypy/module/_cffi_backend/src/parse_c_type.c +++ b/pypy/module/_cffi_backend/src/parse_c_type.c @@ -505,6 +505,7 @@ case '1': if (size == 8 && !memcmp(p, "uint16", 6)) return _CFFI_PRIM_UINT16; + if (size == 8 && !memcmp(p, "char16", 6)) return _CFFI_PRIM_CHAR16; break; case '2': @@ -513,6 +514,7 @@ case '3': if (size == 8 && !memcmp(p, "uint32", 6)) return _CFFI_PRIM_UINT32; + if (size == 8 && !memcmp(p, "char32", 6)) return _CFFI_PRIM_CHAR32; break; case '4': diff --git a/pypy/module/_cffi_backend/src/parse_c_type.h b/pypy/module/_cffi_backend/src/parse_c_type.h --- a/pypy/module/_cffi_backend/src/parse_c_type.h +++ b/pypy/module/_cffi_backend/src/parse_c_type.h @@ -80,8 +80,10 @@ #define _CFFI_PRIM_UINTMAX 47 #define _CFFI_PRIM_FLOATCOMPLEX 48 #define _CFFI_PRIM_DOUBLECOMPLEX 49 +#define _CFFI_PRIM_CHAR16 50 +#define _CFFI_PRIM_CHAR32 51 -#define _CFFI__NUM_PRIM 50 +#define _CFFI__NUM_PRIM 52 #define _CFFI__UNKNOWN_PRIM (-1) #define _CFFI__UNKNOWN_FLOAT_PRIM (-2) #define _CFFI__UNKNOWN_LONG_DOUBLE (-3) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -1925,7 +1925,11 @@ assert string(a, 8).startswith(b'ABC') # may contain additional garbage def test_string_wchar(): - BWChar = new_primitive_type("wchar_t") + for typename in ["wchar_t", "char16_t", "char32_t"]: + _test_string_wchar_variant(typename) + +def _test_string_wchar_variant(typename): + BWChar = new_primitive_type(typename) assert string(cast(BWChar, 42)) == u+'*' assert string(cast(BWChar, 0x4253)) == u+'\u4253' assert string(cast(BWChar, 0)) == u+'\x00' @@ -2087,22 +2091,44 @@ py.test.raises(TypeError, newp, BStructPtr, [cast(BFunc2, 0)]) def test_wchar(): - BWChar = new_primitive_type("wchar_t") + _test_wchar_variant("wchar_t") + if sys.platform.startswith("linux"): + BWChar = new_primitive_type("wchar_t") + assert sizeof(BWChar) == 4 + assert int(cast(BWChar, -1)) == -1 # signed, on linux + +def test_char16(): + BChar16 = new_primitive_type("char16_t") + assert sizeof(BChar16) == 2 + _test_wchar_variant("char16_t") + assert int(cast(BChar16, -1)) == 0xffff # always unsigned + +def test_char32(): + BChar32 = new_primitive_type("char32_t") + assert sizeof(BChar32) == 4 + _test_wchar_variant("char32_t") + assert int(cast(BChar32, -1)) == 0xffffffff # always unsigned + +def _test_wchar_variant(typename): + BWChar = new_primitive_type(typename) BInt = new_primitive_type("int") pyuni4 = {1: True, 2: False}[len(u+'\U00012345')] wchar4 = {2: False, 4: True}[sizeof(BWChar)] - assert str(cast(BWChar, 0x45)) == "<cdata 'wchar_t' %s'E'>" % ( - mandatory_u_prefix,) - assert str(cast(BWChar, 0x1234)) == "<cdata 'wchar_t' %s'\u1234'>" % ( - mandatory_u_prefix,) - if wchar4: - if not _hacked_pypy_uni4(): + assert str(cast(BWChar, 0x45)) == "<cdata '%s' %s'E'>" % ( + typename, mandatory_u_prefix) + assert str(cast(BWChar, 0x1234)) == "<cdata '%s' %s'\u1234'>" % ( + typename, mandatory_u_prefix) + if not _hacked_pypy_uni4(): + if wchar4: x = cast(BWChar, 0x12345) - assert str(x) == "<cdata 'wchar_t' %s'\U00012345'>" % ( - mandatory_u_prefix,) + assert str(x) == "<cdata '%s' %s'\U00012345'>" % ( + typename, mandatory_u_prefix) assert int(x) == 0x12345 - else: - assert not pyuni4 + else: + x = cast(BWChar, 0x18345) + assert str(x) == "<cdata '%s' %s'\u8345'>" % ( + typename, mandatory_u_prefix) + assert int(x) == 0x8345 # BWCharP = new_pointer_type(BWChar) BStruct = new_struct_type("struct foo_s") @@ -2117,9 +2143,9 @@ s.a1 = u+'\u1234' assert s.a1 == u+'\u1234' if pyuni4: - assert wchar4 - s.a1 = u+'\U00012345' - assert s.a1 == u+'\U00012345' + if wchar4: + s.a1 = u+'\U00012345' + assert s.a1 == u+'\U00012345' elif wchar4: if not _hacked_pypy_uni4(): s.a1 = cast(BWChar, 0x12345) @@ -2154,17 +2180,17 @@ py.test.raises(IndexError, 'a[4]') # w = cast(BWChar, 'a') - assert repr(w) == "<cdata 'wchar_t' %s'a'>" % mandatory_u_prefix + assert repr(w) == "<cdata '%s' %s'a'>" % (typename, mandatory_u_prefix) assert str(w) == repr(w) assert string(w) == u+'a' assert int(w) == ord('a') w = cast(BWChar, 0x1234) - assert repr(w) == "<cdata 'wchar_t' %s'\u1234'>" % mandatory_u_prefix + assert repr(w) == "<cdata '%s' %s'\u1234'>" % (typename, mandatory_u_prefix) assert str(w) == repr(w) assert string(w) == u+'\u1234' assert int(w) == 0x1234 w = cast(BWChar, u+'\u8234') - assert repr(w) == "<cdata 'wchar_t' %s'\u8234'>" % mandatory_u_prefix + assert repr(w) == "<cdata '%s' %s'\u8234'>" % (typename, mandatory_u_prefix) assert str(w) == repr(w) assert string(w) == u+'\u8234' assert int(w) == 0x8234 @@ -2172,8 +2198,8 @@ assert repr(w) == "<cdata 'int' 4660>" if wchar4 and not _hacked_pypy_uni4(): w = cast(BWChar, u+'\U00012345') - assert repr(w) == "<cdata 'wchar_t' %s'\U00012345'>" % ( - mandatory_u_prefix,) + assert repr(w) == "<cdata '%s' %s'\U00012345'>" % ( + typename, mandatory_u_prefix) assert str(w) == repr(w) assert string(w) == u+'\U00012345' assert int(w) == 0x12345 @@ -2200,7 +2226,7 @@ py.test.raises(RuntimeError, string, q) # def cb(p): - assert repr(p).startswith("<cdata 'wchar_t *' 0x") + assert repr(p).startswith("<cdata '%s *' 0x" % typename) return len(string(p)) BFunc = new_function_type((BWCharP,), BInt, False) f = callback(BFunc, cb, -42) @@ -2213,6 +2239,27 @@ x = cast(BWChar, -1) py.test.raises(ValueError, string, x) +def test_wchar_variants_mix(): + BWChar = new_primitive_type("wchar_t") + BChar16 = new_primitive_type("char16_t") + BChar32 = new_primitive_type("char32_t") + assert int(cast(BChar32, cast(BChar16, -2))) == 0xfffe + assert int(cast(BWChar, cast(BChar16, -2))) == 0xfffe + assert int(cast(BChar16, cast(BChar32, 0x0001f345))) == 0xf345 + assert int(cast(BChar16, cast(BWChar, 0x0001f345))) == 0xf345 + # + BChar16A = new_array_type(new_pointer_type(BChar16), None) + BChar32A = new_array_type(new_pointer_type(BChar32), None) + x = cast(BChar32, 'A') + py.test.raises(TypeError, newp, BChar16A, [x]) + x = cast(BChar16, 'A') + py.test.raises(TypeError, newp, BChar32A, [x]) + # + a = newp(BChar16A, u+'\U00012345') + assert len(a) == 3 + a = newp(BChar32A, u+'\U00012345') + assert len(a) == 2 # even if the Python unicode string above is 2 chars + def test_keepalive_struct(): # exception to the no-keepalive rule: p=newp(BStructPtr) returns a # pointer owning the memory, and p[0] returns a pointer to the @@ -3439,14 +3486,15 @@ py.test.raises(TypeError, "p[1:5] = u+'XYZT'") py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]") # - BUniChar = new_primitive_type("wchar_t") - BArray = new_array_type(new_pointer_type(BUniChar), None) - p = newp(BArray, u+"foobar") - p[2:5] = [u+"*", u+"Z", u+"T"] - p[1:3] = u+"XY" - assert list(p) == [u+"f", u+"X", u+"Y", u+"Z", u+"T", u+"r", u+"\x00"] - py.test.raises(TypeError, "p[1:5] = b'XYZT'") - py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]") + for typename in ["wchar_t", "char16_t", "char32_t"]: + BUniChar = new_primitive_type(typename) + BArray = new_array_type(new_pointer_type(BUniChar), None) + p = newp(BArray, u+"foobar") + p[2:5] = [u+"*", u+"Z", u+"T"] + p[1:3] = u+"XY" + assert list(p) == [u+"f", u+"X", u+"Y", u+"Z", u+"T", u+"r", u+"\x00"] + py.test.raises(TypeError, "p[1:5] = b'XYZT'") + py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]") def test_void_p_arithmetic(): BVoid = new_void_type() @@ -3759,10 +3807,12 @@ p0 = p assert unpack(p, 10) == b"abc\x00def\x00\x00\x00" assert unpack(p+1, 5) == b"bc\x00de" - BWChar = new_primitive_type("wchar_t") - BArray = new_array_type(new_pointer_type(BWChar), 10) # wchar_t[10] - p = newp(BArray, u"abc\x00def") - assert unpack(p, 10) == u"abc\x00def\x00\x00\x00" + + for typename in ["wchar_t", "char16_t", "char32_t"]: + BWChar = new_primitive_type(typename) + BArray = new_array_type(new_pointer_type(BWChar), 10) # wchar_t[10] + p = newp(BArray, u"abc\x00def") + assert unpack(p, 10) == u"abc\x00def\x00\x00\x00" for typename, samples in [ ("uint8_t", [0, 2**8-1]), diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py --- a/pypy/module/_cffi_backend/test/test_ffi_obj.py +++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py @@ -555,3 +555,11 @@ import _cffi_backend as _cffi1_backend ffi = _cffi1_backend.FFI() raises(ffi.error, ffi.cast, "int[-5]", 0) + + def test_char32_t(self): + import _cffi_backend as _cffi1_backend + ffi = _cffi1_backend.FFI() + z = ffi.new("char32_t[]", u'\U00012345') + assert len(z) == 2 + assert ffi.cast("int *", z)[0] == 0x12345 + assert list(z) == [u'\U00012345', u'\x00'] # maybe a 2-unichars str diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py new file mode 100644 --- /dev/null +++ b/pypy/module/_cffi_backend/wchar_helper.py @@ -0,0 +1,192 @@ +from rpython.rlib.objectmodel import specialize +from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask +from rpython.rtyper.annlowlevel import llunicode +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rtyper.lltypesystem.rstr import copy_unicode_to_raw + +SIZE_UNICODE = rffi.sizeof(lltype.UniChar) + + +if SIZE_UNICODE == 4: + def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint + return unichr(intmask(ordinal)) +else: + def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint + if ordinal <= 0xffff: + return unichr(intmask(ordinal)) + elif ordinal <= 0x10ffff: + ordinal = intmask(ordinal - 0x10000) + return (unichr(0xD800 | (ordinal >> 10)) + + unichr(0xDC00 | (ordinal & 0x3FF))) + else: + raise OutOfRange(ordinal) + +def is_surrogate(u, index): + return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and + unichr(0xDC00) <= u[index + 1] <= unichr(0xDFFF)) + +def as_surrogate(u, index): + ordinal = (ord(u[index + 0]) - 0xD800) << 10 + ordinal |= (ord(u[index + 1]) - 0xDC00) + return r_uint(ordinal + 0x10000) + +def unicode_to_ordinal(u): + if len(u) == 1: + u = ord(u[0]) + return r_uint(u) + elif SIZE_UNICODE == 2: + if len(u) == 2 and is_surrogate(u, 0): + return r_uint(as_surrogate(u, 0)) + raise ValueError + + +class OutOfRange(Exception): + ordinal = 0 + + def __init__(self, ordinal): + ordinal = intmask(rffi.cast(rffi.INT, ordinal)) + self.ordinal = ordinal + +def _unicode_from_wchar(ptr, length): + return rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length) + + +if SIZE_UNICODE == 2: + def unicode_from_char32(ptr, length): + # 'ptr' is a pointer to 'length' 32-bit integers + ptr = rffi.cast(rffi.UINTP, ptr) + alloc = length + for i in range(length): + if rffi.cast(lltype.Unsigned, ptr[i]) > 0xFFFF: + alloc += 1 + + u = [u'\x00'] * alloc + j = 0 + for i in range(length): + ordinal = rffi.cast(lltype.Unsigned, ptr[i]) + if ordinal > 0xFFFF: + if ordinal > 0x10FFFF: + raise OutOfRange(ordinal) + ordinal = intmask(ordinal - 0x10000) + u[j] = unichr(0xD800 | (ordinal >> 10)) + j += 1 + u[j] = unichr(0xDC00 | (ordinal & 0x3FF)) + j += 1 + else: + u[j] = unichr(intmask(ordinal)) + j += 1 + assert j == len(u) + return u''.join(u) + + unicode_from_char16 = _unicode_from_wchar + +else: + unicode_from_char32 = _unicode_from_wchar + + def unicode_from_char16(ptr, length): + # 'ptr' is a pointer to 'length' 16-bit integers + ptr = rffi.cast(rffi.USHORTP, ptr) + u = [u'\x00'] * length + i = 0 + j = 0 + while j < length: + ch = intmask(ptr[j]) + j += 1 + if 0xD800 <= ch <= 0xDBFF and j < length: + ch2 = intmask(ptr[j]) + if 0xDC00 <= ch2 <= 0xDFFF: + ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 + j += 1 + u[i] = unichr(ch) + i += 1 + del u[i:] + return u''.join(u) + + +@specialize.ll() +def _measure_length(ptr, maxlen): + result = 0 + if maxlen < 0: + while intmask(ptr[result]) != 0: + result += 1 + else: + while result < maxlen and intmask(ptr[result]) != 0: + result += 1 + return result + +def measure_length_16(ptr, maxlen=-1): + return _measure_length(rffi.cast(rffi.USHORTP, ptr), maxlen) + +def measure_length_32(ptr, maxlen=-1): + return _measure_length(rffi.cast(rffi.UINTP, ptr), maxlen) + + +def unicode_size_as_char16(u): + result = len(u) + if SIZE_UNICODE == 4: + for i in range(result): + if ord(u[i]) > 0xFFFF: + result += 1 + return result + +def unicode_size_as_char32(u): + result = len(u) + if SIZE_UNICODE == 2 and result > 1: + for i in range(result - 1): + if is_surrogate(u, i): + result -= 1 + return result + + +def _unicode_to_wchar(u, target_ptr, target_length, add_final_zero): + # 'target_ptr' is a raw pointer to 'target_length' wchars; + # we assume here that target_length == len(u). + unichardata = rffi.cast(rffi.CWCHARP, target_ptr) + copy_unicode_to_raw(llunicode(u), unichardata, 0, target_length) + if add_final_zero: + unichardata[target_length] = u'\x00' + + +if SIZE_UNICODE == 2: + def unicode_to_char32(u, target_ptr, target_length, add_final_zero): + # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers; + # we assume here that target_length == unicode_size_as_char32(u). + ptr = rffi.cast(rffi.UINTP, target_ptr) + src_index = 0 + last_surrogate_pos = len(u) - 2 + for i in range(target_length): + if src_index <= last_surrogate_pos and is_surrogate(u, src_index): + ordinal = as_surrogate(u, src_index) + src_index += 2 + else: + ordinal = r_uint(ord(u[src_index])) + src_index += 1 + ptr[i] = rffi.cast(rffi.UINT, ordinal) + if add_final_zero: + ptr[target_length] = rffi.cast(rffi.UINT, 0) + + unicode_to_char16 = _unicode_to_wchar + +else: + unicode_to_char32 = _unicode_to_wchar + + def unicode_to_char16(u, target_ptr, target_length, add_final_zero): + # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers; + # we assume here that target_length == unicode_size_as_char16(u). + ptr = rffi.cast(rffi.USHORTP, target_ptr) + for uc in u: + ordinal = ord(uc) + if ordinal > 0xFFFF: + if ordinal > 0x10FFFF: + raise OutOfRange(ordinal) + ordinal -= 0x10000 + ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10)) + ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF)) + ptr = rffi.ptradd(ptr, 2) + else: + ptr[0] = rffi.cast(rffi.USHORT, ordinal) + ptr = rffi.ptradd(ptr, 1) + assert ptr == ( + rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length)) + if add_final_zero: + ptr[0] = rffi.cast(rffi.USHORT, 0) diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py @@ -2,6 +2,7 @@ import py, sys, platform import pytest from pypy.module.test_lib_pypy.cffi_tests.cffi0 import backend_tests, test_function, test_ownlib +from pypy.module.test_lib_pypy.cffi_tests.support import u from cffi import FFI import _cffi_backend @@ -398,6 +399,8 @@ "double", "long double", "wchar_t", + "char16_t", + "char32_t", "_Bool", "int8_t", "uint8_t", @@ -509,3 +512,43 @@ py.test.raises(TypeError, cd) py.test.raises(TypeError, cd, ffi.NULL) py.test.raises(TypeError, cd, ffi.typeof("void *")) + + def test_explicitly_defined_char16_t(self): + ffi = FFI() + ffi.cdef("typedef uint16_t char16_t;") + x = ffi.cast("char16_t", 1234) + assert ffi.typeof(x) is ffi.typeof("uint16_t") + + def test_char16_t(self): + ffi = FFI() + x = ffi.new("char16_t[]", 5) + assert len(x) == 5 and ffi.sizeof(x) == 10 + x[2] = u+'\u1324' + assert x[2] == u+'\u1324' + y = ffi.new("char16_t[]", u+'\u1234\u5678') + assert len(y) == 3 + assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00'] + assert ffi.string(y) == u+'\u1234\u5678' + z = ffi.new("char16_t[]", u+'\U00012345') + assert len(z) == 3 + assert list(z) == [u+'\ud808', u+'\udf45', u+'\x00'] + assert ffi.string(z) == u+'\U00012345' + + def test_char32_t(self): + ffi = FFI() + x = ffi.new("char32_t[]", 5) + assert len(x) == 5 and ffi.sizeof(x) == 20 + x[3] = u+'\U00013245' + assert x[3] == u+'\U00013245' + y = ffi.new("char32_t[]", u+'\u1234\u5678') + assert len(y) == 3 + assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00'] + py_uni = u+'\U00012345' + z = ffi.new("char32_t[]", py_uni) + assert len(z) == 2 + assert list(z) == [py_uni, u+'\x00'] # maybe a 2-unichars string + assert ffi.string(z) == py_uni + if len(py_uni) == 1: # 4-bytes unicodes in Python + s = ffi.new("char32_t[]", u+'\ud808\udf00') + assert len(s) == 3 + assert list(s) == [u+'\ud808', u+'\udf00', u+'\x00'] diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ownlib.py @@ -3,6 +3,7 @@ import subprocess, weakref from cffi import FFI from cffi.backend_ctypes import CTypesBackend +from pypy.module.test_lib_pypy.cffi_tests.support import u SOURCE = """\ @@ -93,6 +94,15 @@ } EXPORT int my_array[7] = {0, 1, 2, 3, 4, 5, 6}; + +EXPORT unsigned short foo_2bytes(unsigned short a) +{ + return (unsigned short)(a + 42); +} +EXPORT unsigned int foo_4bytes(unsigned int a) +{ + return (unsigned int)(a + 42); +} """ class TestOwnLib(object): @@ -301,3 +311,18 @@ pfn = ffi.addressof(lib, "test_getting_errno") assert ffi.typeof(pfn) == ffi.typeof("int(*)(void)") assert pfn == lib.test_getting_errno + + def test_char16_char32_t(self): + if self.module is None: + py.test.skip("fix the auto-generation of the tiny test lib") + if self.Backend is CTypesBackend: + py.test.skip("not implemented with the ctypes backend") + ffi = FFI(backend=self.Backend()) + ffi.cdef(""" + char16_t foo_2bytes(char16_t); + char32_t foo_4bytes(char32_t); + """) + lib = ffi.dlopen(self.module) + assert lib.foo_2bytes(u+'\u1234') == u+'\u125e' + assert lib.foo_4bytes(u+'\u1234') == u+'\u125e' + assert lib.foo_4bytes(u+'\U00012345') == u+'\U0001236f' diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_verify.py @@ -242,7 +242,7 @@ F = tp.is_float_type() X = tp.is_complex_type() I = tp.is_integer_type() - assert C == (typename in ('char', 'wchar_t')) + assert C == (typename in ('char', 'wchar_t', 'char16_t', 'char32_t')) assert F == (typename in ('float', 'double', 'long double')) assert X == (typename in ('float _Complex', 'double _Complex')) assert I + F + C + X == 1 # one and only one of them is true @@ -385,6 +385,10 @@ lib = ffi.verify("wchar_t foo(wchar_t x) { return x+1; }") assert lib.foo(uniexample1) == uniexample2 +def test_char16_char32_type(): + py.test.skip("XXX test or fully prevent char16_t and char32_t from " + "working in ffi.verify() mode") + def test_no_argument(): ffi = FFI() ffi.cdef("int foo(void);") diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_new_ffi_1.py @@ -1673,6 +1673,8 @@ "double", "long double", "wchar_t", + "char16_t", + "char32_t", "_Bool", "int8_t", "uint8_t", @@ -1743,3 +1745,30 @@ exec("from _test_import_from_lib import *", d) assert (sorted([x for x in d.keys() if not x.startswith('__')]) == ['ffi', 'lib']) + + def test_char16_t(self): + x = ffi.new("char16_t[]", 5) + assert len(x) == 5 and ffi.sizeof(x) == 10 + x[2] = u+'\u1324' + assert x[2] == u+'\u1324' + y = ffi.new("char16_t[]", u+'\u1234\u5678') + assert len(y) == 3 + assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00'] + assert ffi.string(y) == u+'\u1234\u5678' + z = ffi.new("char16_t[]", u+'\U00012345') + assert len(z) == 3 + assert list(z) == [u+'\ud808', u+'\udf45', u+'\x00'] + assert ffi.string(z) == u+'\U00012345' + + def test_char32_t(self): + x = ffi.new("char32_t[]", 5) + assert len(x) == 5 and ffi.sizeof(x) == 20 + x[3] = u+'\U00013245' + assert x[3] == u+'\U00013245' + y = ffi.new("char32_t[]", u+'\u1234\u5678') + assert len(y) == 3 + assert list(y) == [u+'\u1234', u+'\u5678', u+'\x00'] + z = ffi.new("char32_t[]", u+'\U00012345') + assert len(z) == 2 + assert list(z) == [u+'\U00012345', u+'\x00'] # maybe a 2-unichars strin + assert ffi.string(z) == u+'\U00012345' diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py @@ -25,13 +25,14 @@ assert ''.join(map(str, recomp.cffi_types)) == expected_output def verify(ffi, module_name, source, *args, **kwds): + no_cpp = kwds.pop('no_cpp', False) kwds.setdefault('undef_macros', ['NDEBUG']) module_name = '_CFFI_' + module_name ffi.set_source(module_name, source) - if not os.environ.get('NO_CPP'): # test the .cpp mode too + if not os.environ.get('NO_CPP') and not no_cpp: # test the .cpp mode too kwds.setdefault('source_extension', '.cpp') source = 'extern "C" {\n%s\n}' % (source,) - else: + elif sys.platform != 'win32': # add '-Werror' to the existing 'extra_compile_args' flags kwds['extra_compile_args'] = (kwds.get('extra_compile_args', []) + ['-Werror']) @@ -2010,7 +2011,7 @@ lib = verify(ffi, "test_function_returns_float_complex", """ #include <complex.h> static float _Complex f1(float a, float b) { return a + I*2.0*b; } - """) + """, no_cpp=True) # <complex.h> fails on some systems with C++ result = lib.f1(1.25, 5.1) assert type(result) == complex assert result.real == 1.25 # exact @@ -2024,7 +2025,7 @@ lib = verify(ffi, "test_function_returns_double_complex", """ #include <complex.h> static double _Complex f1(double a, double b) { return a + I*2.0*b; } - """) + """, no_cpp=True) # <complex.h> fails on some systems with C++ result = lib.f1(1.25, 5.1) assert type(result) == complex assert result.real == 1.25 # exact @@ -2038,7 +2039,7 @@ lib = verify(ffi, "test_function_argument_float_complex", """ #include <complex.h> static float f1(float _Complex x) { return cabsf(x); } - """) + """, no_cpp=True) # <complex.h> fails on some systems with C++ x = complex(12.34, 56.78) result = lib.f1(x) assert abs(result - abs(x)) < 1e-5 @@ -2051,7 +2052,7 @@ lib = verify(ffi, "test_function_argument_double_complex", """ #include <complex.h> static double f1(double _Complex x) { return cabs(x); } - """) + """, no_cpp=True) # <complex.h> fails on some systems with C++ x = complex(12.34, 56.78) result = lib.f1(x) assert abs(result - abs(x)) < 1e-11 @@ -2251,3 +2252,34 @@ int f(int a) { return a + 40; } """, extra_compile_args=['-fvisibility=hidden']) assert lib.f(2) == 42 + +def test_override_default_definition(): + ffi = FFI() + ffi.cdef("typedef long int16_t, char16_t;") + lib = verify(ffi, "test_override_default_definition", "") + assert ffi.typeof("int16_t") is ffi.typeof("char16_t") is ffi.typeof("long") + +def test_char16_char32_type(no_cpp=False): + ffi = FFI() + ffi.cdef(""" + char16_t foo_2bytes(char16_t); + char32_t foo_4bytes(char32_t); + """) + lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """ + #if !defined(__cplusplus) || __cplusplus < 201103L + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; + #endif + + char16_t foo_2bytes(char16_t a) { return (char16_t)(a + 42); } + char32_t foo_4bytes(char32_t a) { return (char32_t)(a + 42); } + """, no_cpp=no_cpp) + assert lib.foo_2bytes(u+'\u1234') == u+'\u125e' + assert lib.foo_4bytes(u+'\u1234') == u+'\u125e' + assert lib.foo_4bytes(u+'\U00012345') == u+'\U0001236f' + py.test.raises(TypeError, lib.foo_2bytes, u+'\U00012345') + py.test.raises(TypeError, lib.foo_2bytes, 1234) + py.test.raises(TypeError, lib.foo_4bytes, 1234) + +def test_char16_char32_plain_c(): + test_char16_char32_type(no_cpp=True) diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_verify1.py @@ -222,7 +222,7 @@ F = tp.is_float_type() X = tp.is_complex_type() I = tp.is_integer_type() - assert C == (typename in ('char', 'wchar_t')) + assert C == (typename in ('char', 'wchar_t', 'char16_t', 'char32_t')) assert F == (typename in ('float', 'double', 'long double')) assert X == (typename in ('float _Complex', 'double _Complex')) assert I + F + C + X == 1 # one and only one of them is true _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit