Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r79878:47c87779c73a Date: 2015-09-28 10:35 +0200 http://bitbucket.org/pypy/pypy/changeset/47c87779c73a/
Log: Tweak the RPython and PyPy ord() to behave like CPython's when given strange inputs: never return negative numbers on 64-bit. Also fix the repr() of unicodes containing such a character. (Tested in the array module because it's hard to make invalid unichars otherwise.) diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py --- a/pypy/module/array/test/test_array.py +++ b/pypy/module/array/test/test_array.py @@ -844,6 +844,18 @@ b.byteswap() assert a != b + def test_unicode_ord_positive(self): + import sys + if sys.maxunicode == 0xffff: + skip("test for 32-bit unicodes") + a = self.array('u', '\xff\xff\xff\xff') + assert len(a) == 1 + assert repr(a[0]) == "u'\Uffffffff'" + if sys.maxint == 2147483647: + assert ord(a[0]) == -1 + else: + assert ord(a[0]) == 4294967295 + def test_weakref(self): import weakref a = self.array('c', 'Hi!') diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -652,11 +652,11 @@ def len(self): return immutablevalue(1) +class __extend__(SomeChar): + def ord(self): return SomeInteger(nonneg=True) -class __extend__(SomeChar): - def method_isspace(self): return s_Bool @@ -675,6 +675,13 @@ def method_upper(self): return self +class __extend__(SomeUnicodeCodePoint): + + def ord(self): + # warning, on 32-bit with 32-bit unichars, this might return + # negative numbers + return SomeInteger() + class __extend__(SomeIterator): def iter(self): diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py --- a/rpython/jit/metainterp/test/test_ajit.py +++ b/rpython/jit/metainterp/test/test_ajit.py @@ -4320,14 +4320,14 @@ self.meta_interp(allfuncs, [9, 2000]) - def test_unichar_might_be_signed(self): - py.test.skip("wchar_t is sometimes a signed 32-bit integer type, " - "but RPython inteprets it as unsigned (but still " - "translates to wchar_t, so can create confusion)") + def test_unichar_ord_is_never_signed_on_64bit(self): + import sys + if sys.maxunicode == 0xffff: + py.test.skip("test for 32-bit unicodes") def f(x): - return rffi.cast(lltype.Signed, rffi.cast(lltype.UniChar, x)) + return ord(rffi.cast(lltype.UniChar, x)) res = self.interp_operations(f, [-1]) - if rffi.r_wchar_t.SIGN: + if sys.maxint == 2147483647: assert res == -1 else: - assert res == 2 ** 16 - 1 or res == 2 ** 32 - 1 + assert res == 4294967295 diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -1403,11 +1403,10 @@ result.append(CHR(quote)) return result.build() + TABLE = STR('0123456789abcdef') + def char_escape_helper(result, char): - num = hex(char) - if STR is unicode: - num = num.decode('ascii') - if char >= 0x10000: + if char >= 0x10000 or char < 0: result.append(STR("\\U")) zeros = 8 elif char >= 0x100: @@ -1416,11 +1415,8 @@ else: result.append(STR("\\x")) zeros = 2 - lnum = len(num) - nb = zeros + 2 - lnum # num starts with '0x' - if nb > 0: - result.append_multiple_char(STR('0'), nb) - result.append_slice(num, 2, lnum) + for i in range(zeros-1, -1, -1): + result.append(TABLE[(char >> (4 * i)) & 0x0f]) return unicode_escape, char_escape_helper diff --git a/rpython/translator/c/src/int.h b/rpython/translator/c/src/int.h --- a/rpython/translator/c/src/int.h +++ b/rpython/translator/c/src/int.h @@ -231,8 +231,12 @@ #define OP_TRUNCATE_LONGLONG_TO_INT(x,r) r = (Signed)(x) #define OP_TRUNCATE_LONGLONGLONG_TO_INT(x,r) r = (Signed)(x) -#define OP_CAST_UNICHAR_TO_INT(x,r) r = (Signed)((Unsigned)(x)) /*?*/ -#define OP_CAST_INT_TO_UNICHAR(x,r) r = (unsigned int)(x) +/* Casting from UniChar to int goes first via "unsigned int". + On 64-bit platforms, this forces a signed 32-bit wchar_t + to an unsigned integer, which is also what CPython's ord() + does. */ +#define OP_CAST_UNICHAR_TO_INT(x,r) r = ((unsigned int)(x)) +#define OP_CAST_INT_TO_UNICHAR(x,r) r = (x) /* bool operations */ _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit