Author: Philip Jenvey <[email protected]> Branch: py3k Changeset: r65297:f23adcde38f1 Date: 2013-07-08 15:50 -0700 http://bitbucket.org/pypy/pypy/changeset/f23adcde38f1/
Log: merge default diff --git a/lib-python/2.7/json/__init__.py b/lib-python/2.7/json/__init__.py --- a/lib-python/2.7/json/__init__.py +++ b/lib-python/2.7/json/__init__.py @@ -105,6 +105,12 @@ __author__ = 'Bob Ippolito <[email protected]>' +try: + # PyPy speedup, the interface is different than CPython's _json + import _pypyjson +except ImportError: + _pypyjson = None + from .decoder import JSONDecoder from .encoder import JSONEncoder @@ -241,7 +247,6 @@ _default_decoder = JSONDecoder(encoding=None, object_hook=None, object_pairs_hook=None) - def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing @@ -323,7 +328,10 @@ if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and parse_constant is None and object_pairs_hook is None and not kw): - return _default_decoder.decode(s) + if _pypyjson and not isinstance(s, unicode): + return _pypyjson.loads(s) + else: + return _default_decoder.decode(s) if cls is None: cls = JSONDecoder if object_hook is not None: diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -36,7 +36,7 @@ "binascii", "_multiprocessing", '_warnings', "_collections", "_multibytecodec", "_ffi", "_continuation", "_csv", "_cffi_backend", - "_posixsubprocess", # "cppyy", "micronumpy", + "_posixsubprocess", "_pypyjson", # "cppyy", "micronumpy", ] )) diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst --- a/pypy/doc/coding-guide.rst +++ b/pypy/doc/coding-guide.rst @@ -907,7 +907,7 @@ runs at application level. If you need to use modules you have to import them within the test function. -Another possibility to pass in data into the AppTest is to use +Data can be passed into the AppTest using the ``setup_class`` method of the AppTest. All wrapped objects that are attached to the class there and start with ``w_`` can be accessed via self (but without the ``w_``) in the actual test method. An example:: @@ -922,6 +922,46 @@ .. _`run the tests as usual`: +Another possibility is to use cls.space.appexec, for example:: + + class AppTestSomething(object): + def setup_class(cls): + arg = 2 + cls.w_result = cls.space.appexec([cls.space.wrap(arg)], """(arg): + return arg ** 6 + """) + + def test_power(self): + assert self.result == 2 ** 6 + +which executes the code string function with the given arguments at app level. +Note the use of ``w_result`` in ``setup_class`` but self.result in the test +Here is how to define an app level class in ``setup_class`` that can be used +in subsequent tests:: + + class AppTestSet(object): + def setup_class(cls): + w_fakeint = cls.space.appexec([], """(): + class FakeInt(object): + def __init__(self, value): + self.value = value + def __hash__(self): + return hash(self.value) + + def __eq__(self, other): + if other == self.value: + return True + return False + return FakeInt + """) + cls.w_FakeInt = w_fakeint + + def test_fakeint(self): + f1 = self.FakeInt(4) + assert f1 == 4 + assert hash(f1) == hash(4) + + Command line tool test_all -------------------------- diff --git a/pypy/doc/config/objspace.usemodules._pypyjson.txt b/pypy/doc/config/objspace.usemodules._pypyjson.txt new file mode 100644 --- /dev/null +++ b/pypy/doc/config/objspace.usemodules._pypyjson.txt @@ -0,0 +1,1 @@ +RPython speedups for the stdlib json module diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -96,8 +96,21 @@ Does PyPy have a GIL? Why? ------------------------------------------------- -Yes, PyPy has a GIL. Removing the GIL is very hard. The first problem -is that our garbage collectors are not re-entrant. +Yes, PyPy has a GIL. Removing the GIL is very hard. The problems are +essentially the same as with CPython (including the fact that our +garbage collectors are not thread-safe so far). Fixing it is possible, +as shown by Jython and IronPython, but difficult. It would require +adapting the whole source code of PyPy, including subtle decisions about +whether some effects are ok or not for the user (i.e. the Python +programmer). + +Instead, since 2012, there is work going on on a still very experimental +Software Transactional Memory (STM) version of PyPy. This should give +an alternative PyPy which internally has no GIL, while at the same time +continuing to give the Python programmer the complete illusion of having +one. It would in fact push forward *more* GIL-ish behavior, like +declaring that some sections of the code should run without releasing +the GIL in the middle (these are called *atomic sections* in STM). ------------------------------------------ How do I write extension modules for PyPy? @@ -306,7 +319,7 @@ No, and you shouldn't try. First and foremost, RPython is a language designed for writing interpreters. It is a restricted subset of -Python. If you program is not an interpreter but tries to do "real +Python. If your program is not an interpreter but tries to do "real things", like use *any* part of the standard Python library or *any* 3rd-party library, then it is not RPython to start with. You should only look at RPython if you try to `write your own interpreter`__. @@ -322,8 +335,35 @@ Yes, it is possible with enough effort to compile small self-contained pieces of RPython code doing a few performance-sensitive things. But this case is not interesting for us. If you needed to rewrite the code -in RPython, you could as well have rewritten it in C for example. The -latter is a much more supported, much more documented language `:-)` +in RPython, you could as well have rewritten it in C or C++ or Java for +example. These are much more supported, much more documented languages +`:-)` + + *The above paragraphs are not the whole truth. It* is *true that there + are cases where writing a program as RPython gives you substantially + better speed than running it on top of PyPy. However, the attitude of + the core group of people behind PyPy is to answer: "then report it as a + performance bug against PyPy!".* + + *Here is a more diluted way to put it. The "No, don't!" above is a + general warning we give to new people. They are likely to need a lot + of help from* some *source, because RPython is not so simple nor + extensively documented; but at the same time, we, the pypy core group + of people, are not willing to invest time in supporting 3rd-party + projects that do very different things than interpreters for dynamic + languages --- just because we have other interests and there are only + so many hours a day. So as a summary I believe it is only fair to + attempt to point newcomers at existing alternatives, which are more + mainstream and where they will get help from many people.* + + *If anybody seriously wants to promote RPython anyway, he is welcome + to: we won't actively resist such a plan. There are a lot of things + that could be done to make RPython a better Java-ish language for + example, starting with supporting non-GIL-based multithreading, but we + don't implement them because they have little relevance to us. This + is open source, which means that anybody is free to promote and + develop anything; but it also means that you must let us choose* not + *to go into that direction ourselves.* --------------------------------------------------- Which backends are there for the RPython toolchain? diff --git a/pypy/doc/whatsnew-2.1.rst b/pypy/doc/whatsnew-2.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/whatsnew-2.1.rst @@ -0,0 +1,78 @@ +====================== +What's new in PyPy 2.1 +====================== + +.. this is a revision shortly after release-2.0 +.. startrev: a13c07067613 + +.. branch: ndarray-ptp +put and array.put + +.. branch: numpy-pickle +Pickling of numpy arrays and dtypes (including record dtypes) + +.. branch: remove-array-smm +Remove multimethods in the arraymodule + +.. branch: callback-stacklet +Fixed bug when switching stacklets from a C callback + +.. branch: remove-set-smm +Remove multi-methods on sets + +.. branch: numpy-subarrays +Implement subarrays for numpy + +.. branch: remove-dict-smm +Remove multi-methods on dict + +.. branch: remove-list-smm-2 +Remove remaining multi-methods on list + +.. branch: arm-stacklet +Stacklet support for ARM, enables _continuation support + +.. branch: remove-tuple-smm +Remove multi-methods on tuple + +.. branch: remove-iter-smm +Remove multi-methods on iterators + +.. branch: emit-call-x86 +.. branch: emit-call-arm + +.. branch: on-abort-resops +Added list of resops to the pypyjit on_abort hook. + +.. branch: logging-perf +Speeds up the stdlib logging module + +.. branch: operrfmt-NT +Adds a couple convenient format specifiers to operationerrfmt + +.. branch: win32-fixes3 +Skip and fix some non-translated (own) tests for win32 builds + +.. branch: ctypes-byref +Add the '_obj' attribute on ctypes pointer() and byref() objects + +.. branch: argsort-segfault +Fix a segfault in argsort when sorting by chunks on multidim numpypy arrays (mikefc) + +.. branch: dtype-isnative +.. branch: ndarray-round + +.. branch: faster-str-of-bigint +Improve performance of str(long). + +.. branch: ndarray-view +Add view to ndarray and zeroD arrays, not on dtype scalars yet + +.. branch: numpypy-segfault +fix segfault caused by iterating over empty ndarrays + +.. branch: identity-set +Faster sets for objects + +.. branch: inline-identityhash +Inline the fast path of id() and hash() diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -5,6 +5,9 @@ .. this is a revision shortly after release-2.0 .. startrev: a13c07067613 +.. branch: ndarray-ptp +put and array.put + .. branch: numpy-pickle Pickling of numpy arrays and dtypes (including record dtypes) @@ -65,6 +68,9 @@ .. branch: ndarray-view Add view to ndarray and zeroD arrays, not on dtype scalars yet +.. branch: numpypy-segfault +fix segfault caused by iterating over empty ndarrays + .. branch: identity-set Faster sets for objects diff --git a/pypy/module/_pypyjson/__init__.py b/pypy/module/_pypyjson/__init__.py new file mode 100644 --- /dev/null +++ b/pypy/module/_pypyjson/__init__.py @@ -0,0 +1,10 @@ +from pypy.interpreter.mixedmodule import MixedModule + +class Module(MixedModule): + """fast json implementation""" + + appleveldefs = {} + + interpleveldefs = { + 'loads' : 'interp_decoder.loads', + } diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py new file mode 100644 --- /dev/null +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -0,0 +1,404 @@ +import sys +import math +from rpython.rlib.rstring import StringBuilder +from rpython.rlib.objectmodel import specialize +from rpython.rlib import rfloat +from rpython.rtyper.lltypesystem import lltype, rffi +from pypy.interpreter.error import OperationError, operationerrfmt +from pypy.interpreter.gateway import unwrap_spec +from pypy.interpreter import unicodehelper +from rpython.rtyper.annlowlevel import llstr, hlunicode + +OVF_DIGITS = len(str(sys.maxint)) + +def is_whitespace(ch): + return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n' + +# precomputing negative powers of 10 is MUCH faster than using e.g. math.pow +# at runtime +NEG_POW_10 = [10.0**-i for i in range(16)] +def neg_pow_10(x, exp): + if exp >= len(NEG_POW_10): + return 0.0 + return x * NEG_POW_10[exp] + +def strslice2unicode_latin1(s, start, end): + """ + Convert s[start:end] to unicode. s is supposed to be an RPython string + encoded in latin-1, which means that the numeric value of each char is the + same as the corresponding unicode code point. + + Internally it's implemented at the level of low-level helpers, to avoid + the extra copy we would need if we take the actual slice first. + + No bound checking is done, use carefully. + """ + from rpython.rtyper.annlowlevel import llstr, hlunicode + from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE + from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar + length = end-start + ll_s = llstr(s) + ll_res = malloc(UNICODE, length) + ll_res.hash = 0 + for i in range(length): + ch = ll_s.chars[start+i] + ll_res.chars[i] = cast_primitive(UniChar, ch) + return hlunicode(ll_res) + +TYPE_UNKNOWN = 0 +TYPE_STRING = 1 +class JSONDecoder(object): + def __init__(self, space, s): + self.space = space + self.s = s + # we put our string in a raw buffer so: + # 1) we automatically get the '\0' sentinel at the end of the string, + # which means that we never have to check for the "end of string" + # 2) we can pass the buffer directly to strtod + self.ll_chars = rffi.str2charp(s) + self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') + self.pos = 0 + self.last_type = TYPE_UNKNOWN + + def close(self): + rffi.free_charp(self.ll_chars) + lltype.free(self.end_ptr, flavor='raw') + + def getslice(self, start, end): + assert start >= 0 + assert end >= 0 + return self.s[start:end] + + def skip_whitespace(self, i): + while True: + ch = self.ll_chars[i] + if is_whitespace(ch): + i+=1 + else: + break + return i + + @specialize.arg(1) + def _raise(self, msg, *args): + raise operationerrfmt(self.space.w_ValueError, msg, *args) + + def decode_any(self, i): + i = self.skip_whitespace(i) + ch = self.ll_chars[i] + if ch == '"': + return self.decode_string(i+1) + elif ch == '[': + return self.decode_array(i+1) + elif ch == '{': + return self.decode_object(i+1) + elif ch == 'n': + return self.decode_null(i+1) + elif ch == 't': + return self.decode_true(i+1) + elif ch == 'f': + return self.decode_false(i+1) + elif ch == 'I': + return self.decode_infinity(i+1) + elif ch == 'N': + return self.decode_nan(i+1) + elif ch == '-': + if self.ll_chars[i+1] == 'I': + return self.decode_infinity(i+2, sign=-1) + return self.decode_numeric(i) + elif ch.isdigit(): + return self.decode_numeric(i) + else: + self._raise("No JSON object could be decoded: unexpected '%s' at char %d", + ch, self.pos) + + def decode_null(self, i): + if (self.ll_chars[i] == 'u' and + self.ll_chars[i+1] == 'l' and + self.ll_chars[i+2] == 'l'): + self.pos = i+3 + return self.space.w_None + self._raise("Error when decoding null at char %d", i) + + def decode_true(self, i): + if (self.ll_chars[i] == 'r' and + self.ll_chars[i+1] == 'u' and + self.ll_chars[i+2] == 'e'): + self.pos = i+3 + return self.space.w_True + self._raise("Error when decoding true at char %d", i) + + def decode_false(self, i): + if (self.ll_chars[i] == 'a' and + self.ll_chars[i+1] == 'l' and + self.ll_chars[i+2] == 's' and + self.ll_chars[i+3] == 'e'): + self.pos = i+4 + return self.space.w_False + self._raise("Error when decoding false at char %d", i) + + def decode_infinity(self, i, sign=1): + if (self.ll_chars[i] == 'n' and + self.ll_chars[i+1] == 'f' and + self.ll_chars[i+2] == 'i' and + self.ll_chars[i+3] == 'n' and + self.ll_chars[i+4] == 'i' and + self.ll_chars[i+5] == 't' and + self.ll_chars[i+6] == 'y'): + self.pos = i+7 + return self.space.wrap(rfloat.INFINITY * sign) + self._raise("Error when decoding Infinity at char %d", i) + + def decode_nan(self, i): + if (self.ll_chars[i] == 'a' and + self.ll_chars[i+1] == 'N'): + self.pos = i+2 + return self.space.wrap(rfloat.NAN) + self._raise("Error when decoding NaN at char %d", i) + + def decode_numeric(self, i): + start = i + i, ovf_maybe, intval = self.parse_integer(i) + # + # check for the optional fractional part + ch = self.ll_chars[i] + if ch == '.': + if not self.ll_chars[i+1].isdigit(): + self._raise("Expected digit at char %d", i+1) + return self.decode_float(start) + elif ch == 'e' or ch == 'E': + return self.decode_float(start) + elif ovf_maybe: + return self.decode_int_slow(start) + + self.pos = i + return self.space.wrap(intval) + + def decode_float(self, i): + from rpython.rlib import rdtoa + start = rffi.ptradd(self.ll_chars, i) + floatval = rdtoa.dg_strtod(start, self.end_ptr) + diff = rffi.cast(rffi.LONG, self.end_ptr[0]) - rffi.cast(rffi.LONG, start) + self.pos = i + diff + return self.space.wrap(floatval) + + def decode_int_slow(self, i): + start = i + if self.ll_chars[i] == '-': + i += 1 + while self.ll_chars[i].isdigit(): + i += 1 + s = self.getslice(start, i) + self.pos = i + return self.space.call_function(self.space.w_int, self.space.wrap(s)) + + def parse_integer(self, i): + "Parse a decimal number with an optional minus sign" + sign = 1 + # parse the sign + if self.ll_chars[i] == '-': + sign = -1 + i += 1 + elif self.ll_chars[i] == '+': + i += 1 + # + if self.ll_chars[i] == '0': + i += 1 + return i, False, 0 + + intval = 0 + start = i + while True: + ch = self.ll_chars[i] + if ch.isdigit(): + intval = intval*10 + ord(ch)-ord('0') + i += 1 + else: + break + count = i - start + if count == 0: + self._raise("Expected digit at char %d", i) + # if the number has more digits than OVF_DIGITS, it might have + # overflowed + ovf_maybe = (count >= OVF_DIGITS) + return i, ovf_maybe, sign * intval + parse_integer._always_inline_ = True + + def decode_array(self, i): + w_list = self.space.newlist([]) + start = i + count = 0 + i = self.skip_whitespace(start) + if self.ll_chars[i] == ']': + self.pos = i+1 + return w_list + # + while True: + w_item = self.decode_any(i) + i = self.pos + self.space.call_method(w_list, 'append', w_item) + i = self.skip_whitespace(i) + ch = self.ll_chars[i] + i += 1 + if ch == ']': + self.pos = i + return w_list + elif ch == ',': + pass + elif ch == '\0': + self._raise("Unterminated array starting at char %d", start) + else: + self._raise("Unexpected '%s' when decoding array (char %d)", + ch, self.pos) + + def decode_object(self, i): + start = i + w_dict = self.space.newdict() + # + i = self.skip_whitespace(i) + if self.ll_chars[i] == '}': + self.pos = i+1 + return w_dict + # + while True: + # parse a key: value + self.last_type = TYPE_UNKNOWN + w_name = self.decode_any(i) + if self.last_type != TYPE_STRING: + self._raise("Key name must be string for object starting at char %d", start) + i = self.skip_whitespace(self.pos) + ch = self.ll_chars[i] + if ch != ':': + self._raise("No ':' found at char %d", i) + i += 1 + i = self.skip_whitespace(i) + # + w_value = self.decode_any(i) + self.space.setitem(w_dict, w_name, w_value) + i = self.skip_whitespace(self.pos) + ch = self.ll_chars[i] + i += 1 + if ch == '}': + self.pos = i + return w_dict + elif ch == ',': + pass + elif ch == '\0': + self._raise("Unterminated object starting at char %d", start) + else: + self._raise("Unexpected '%s' when decoding object (char %d)", + ch, self.pos) + + + def decode_string(self, i): + start = i + bits = 0 + while True: + # this loop is a fast path for strings which do not contain escape + # characters + ch = self.ll_chars[i] + i += 1 + bits |= ord(ch) + if ch == '"': + if bits & 0x80: + # the 8th bit is set, it's an utf8 strnig + content_utf8 = self.getslice(start, i-1) + content_unicode = unicodehelper.decode_utf8(self.space, content_utf8) + else: + # ascii only, fast path (ascii is a strict subset of + # latin1, and we already checked that all the chars are < + # 128) + content_unicode = strslice2unicode_latin1(self.s, start, i-1) + self.last_type = TYPE_STRING + self.pos = i + return self.space.wrap(content_unicode) + elif ch == '\\': + content_so_far = self.getslice(start, i-1) + self.pos = i-1 + return self.decode_string_escaped(start, content_so_far) + elif ch == '\0': + self._raise("Unterminated string starting at char %d", start) + + + def decode_string_escaped(self, start, content_so_far): + builder = StringBuilder(len(content_so_far)*2) # just an estimate + builder.append(content_so_far) + i = self.pos + while True: + ch = self.ll_chars[i] + i += 1 + if ch == '"': + content_utf8 = builder.build() + content_unicode = unicodehelper.decode_utf8(self.space, content_utf8) + self.last_type = TYPE_STRING + self.pos = i + return self.space.wrap(content_unicode) + elif ch == '\\': + i = self.decode_escape_sequence(i, builder) + elif ch == '\0': + self._raise("Unterminated string starting at char %d", start) + else: + builder.append_multiple_char(ch, 1) # we should implement append_char + + def decode_escape_sequence(self, i, builder): + ch = self.ll_chars[i] + i += 1 + put = builder.append_multiple_char + if ch == '\\': put('\\', 1) + elif ch == '"': put('"' , 1) + elif ch == '/': put('/' , 1) + elif ch == 'b': put('\b', 1) + elif ch == 'f': put('\f', 1) + elif ch == 'n': put('\n', 1) + elif ch == 'r': put('\r', 1) + elif ch == 't': put('\t', 1) + elif ch == 'u': + return self.decode_escape_sequence_unicode(i, builder) + else: + self._raise("Invalid \\escape: %s (char %d)", ch, self.pos-1) + return i + + def decode_escape_sequence_unicode(self, i, builder): + # at this point we are just after the 'u' of the \u1234 sequence. + start = i + i += 4 + hexdigits = self.getslice(start, i) + try: + val = int(hexdigits, 16) + if val & 0xfc00 == 0xd800: + # surrogate pair + val = self.decode_surrogate_pair(i, val) + i += 6 + except ValueError: + self._raise("Invalid \uXXXX escape (char %d)", i-1) + return # help the annotator to know that we'll never go beyond + # this point + # + uchr = unichr(val) + utf8_ch = unicodehelper.encode_utf8(self.space, uchr) + builder.append(utf8_ch) + return i + + def decode_surrogate_pair(self, i, highsurr): + if self.ll_chars[i] != '\\' or self.ll_chars[i+1] != 'u': + self._raise("Unpaired high surrogate at char %d", i) + i += 2 + hexdigits = self.getslice(i, i+4) + lowsurr = int(hexdigits, 16) # the possible ValueError is caugth by the caller + return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00)) + +def loads(space, w_s): + if space.isinstance_w(w_s, space.w_unicode): + raise OperationError(space.w_TypeError, + space.wrap("Expected utf8-encoded str, got unicode")) + s = space.str_w(w_s) + decoder = JSONDecoder(space, s) + try: + w_res = decoder.decode_any(0) + i = decoder.skip_whitespace(decoder.pos) + if i < len(s): + start = i + end = len(s) - 1 + raise operationerrfmt(space.w_ValueError, "Extra data: char %d - %d", start, end) + return w_res + finally: + decoder.close() diff --git a/pypy/module/_pypyjson/targetjson.py b/pypy/module/_pypyjson/targetjson.py new file mode 100644 --- /dev/null +++ b/pypy/module/_pypyjson/targetjson.py @@ -0,0 +1,143 @@ +import sys +import py +ROOT = py.path.local(__file__).dirpath('..', '..', '..') +sys.path.insert(0, str(ROOT)) + +import time +from rpython.rlib.streamio import open_file_as_stream +from pypy.interpreter.error import OperationError +from pypy.module._pypyjson.interp_decoder import loads + + + +## MSG = open('msg.json').read() + +class W_Root(object): + pass + +class W_Dict(W_Root): + def __init__(self): + self.dictval = {} + +class W_Unicode(W_Root): + def __init__(self, x): + self.unival = x + +class W_String(W_Root): + def __init__(self, x): + self.strval = x + +class W_Int(W_Root): + def __init__(self, x): + self.intval = x + +class W_Float(W_Root): + def __init__(self, x): + self.floatval = x + +class W_List(W_Root): + def __init__(self): + self.listval = [] + +class W_Singleton(W_Root): + def __init__(self, name): + self.name = name + +class FakeSpace(object): + + w_None = W_Singleton('None') + w_True = W_Singleton('True') + w_False = W_Singleton('False') + w_ValueError = W_Singleton('ValueError') + w_UnicodeDecodeError = W_Singleton('UnicodeDecodeError') + w_unicode = W_Unicode + w_int = W_Int + w_float = W_Float + + def newtuple(self, items): + return None + + def newdict(self): + return W_Dict() + + def newlist(self, items): + return W_List() + + def isinstance_w(self, w_x, w_type): + return isinstance(w_x, w_type) + + def str_w(self, w_x): + assert isinstance(w_x, W_String) + return w_x.strval + + def call_method(self, obj, name, arg): + assert name == 'append' + assert isinstance(obj, W_List) + obj.listval.append(arg) + call_method._dont_inline_ = True + + def call_function(self, w_func, *args_w): + return self.w_None # XXX + + def setitem(self, d, key, value): + assert isinstance(d, W_Dict) + assert isinstance(key, W_Unicode) + d.dictval[key.unival] = value + + def wrapunicode(self, x): + return W_Unicode(x) + + def wrapint(self, x): + return W_Int(x) + + def wrapfloat(self, x): + return W_Float(x) + + def wrap(self, x): + if isinstance(x, int): + return W_Int(x) + elif isinstance(x, float): + return W_Float(x) + ## elif isinstance(x, str): + ## assert False + else: + return W_Unicode(unicode(x)) + wrap._annspecialcase_ = "specialize:argtype(1)" + + +fakespace = FakeSpace() + +def myloads(msg): + return loads(fakespace, W_String(msg)) + + +def bench(title, N, fn, arg): + a = time.clock() + for i in range(N): + res = fn(arg) + b = time.clock() + print title, (b-a) / N * 1000 + +def entry_point(argv): + if len(argv) != 3: + print 'Usage: %s FILE n' % argv[0] + return 1 + filename = argv[1] + N = int(argv[2]) + f = open_file_as_stream(filename) + msg = f.readall() + + try: + bench('loads ', N, myloads, msg) + except OperationError, e: + print 'Error', e._compute_value(fakespace) + + return 0 + +# _____ Define and setup target ___ + +def target(*args): + return entry_point, None + +if __name__ == '__main__': + entry_point(sys.argv) diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py new file mode 100644 --- /dev/null +++ b/pypy/module/_pypyjson/test/test__pypyjson.py @@ -0,0 +1,188 @@ +# -*- encoding: utf-8 -*- +import py +from pypy.module._pypyjson.interp_decoder import JSONDecoder + +def test_skip_whitespace(): + s = ' hello ' + dec = JSONDecoder('fake space', s) + assert dec.pos == 0 + assert dec.skip_whitespace(0) == 3 + assert dec.skip_whitespace(3) == 3 + assert dec.skip_whitespace(8) == len(s) + dec.close() + + + +class AppTest(object): + spaceconfig = {"objspace.usemodules._pypyjson": True} + + def test_raise_on_unicode(self): + import _pypyjson + raises(TypeError, _pypyjson.loads, u"42") + + + def test_decode_constants(self): + import _pypyjson + assert _pypyjson.loads('null') is None + raises(ValueError, _pypyjson.loads, 'nul') + raises(ValueError, _pypyjson.loads, 'nu') + raises(ValueError, _pypyjson.loads, 'n') + raises(ValueError, _pypyjson.loads, 'nuXX') + # + assert _pypyjson.loads('true') is True + raises(ValueError, _pypyjson.loads, 'tru') + raises(ValueError, _pypyjson.loads, 'tr') + raises(ValueError, _pypyjson.loads, 't') + raises(ValueError, _pypyjson.loads, 'trXX') + # + assert _pypyjson.loads('false') is False + raises(ValueError, _pypyjson.loads, 'fals') + raises(ValueError, _pypyjson.loads, 'fal') + raises(ValueError, _pypyjson.loads, 'fa') + raises(ValueError, _pypyjson.loads, 'f') + raises(ValueError, _pypyjson.loads, 'falXX') + + + def test_decode_string(self): + import _pypyjson + res = _pypyjson.loads('"hello"') + assert res == u'hello' + assert type(res) is unicode + + def test_decode_string_utf8(self): + import _pypyjson + s = u'àèìòù' + res = _pypyjson.loads('"%s"' % s.encode('utf-8')) + assert res == s + + def test_skip_whitespace(self): + import _pypyjson + s = ' "hello" ' + assert _pypyjson.loads(s) == u'hello' + s = ' "hello" extra' + raises(ValueError, "_pypyjson.loads(s)") + + def test_unterminated_string(self): + import _pypyjson + s = '"hello' # missing the trailing " + raises(ValueError, "_pypyjson.loads(s)") + + def test_escape_sequence(self): + import _pypyjson + assert _pypyjson.loads(r'"\\"') == u'\\' + assert _pypyjson.loads(r'"\""') == u'"' + assert _pypyjson.loads(r'"\/"') == u'/' + assert _pypyjson.loads(r'"\b"') == u'\b' + assert _pypyjson.loads(r'"\f"') == u'\f' + assert _pypyjson.loads(r'"\n"') == u'\n' + assert _pypyjson.loads(r'"\r"') == u'\r' + assert _pypyjson.loads(r'"\t"') == u'\t' + + def test_escape_sequence_in_the_middle(self): + import _pypyjson + s = r'"hello\nworld"' + assert _pypyjson.loads(s) == "hello\nworld" + + def test_unterminated_string_after_escape_sequence(self): + import _pypyjson + s = r'"hello\nworld' # missing the trailing " + raises(ValueError, "_pypyjson.loads(s)") + + def test_escape_sequence_unicode(self): + import _pypyjson + s = r'"\u1234"' + assert _pypyjson.loads(s) == u'\u1234' + + def test_invalid_utf_8(self): + import _pypyjson + s = '"\xe0"' # this is an invalid UTF8 sequence inside a string + raises(UnicodeDecodeError, "_pypyjson.loads(s)") + + def test_decode_numeric(self): + import sys + import _pypyjson + def check(s, val): + res = _pypyjson.loads(s) + assert type(res) is type(val) + assert res == val + # + check('42', 42) + check('-42', -42) + check('42.123', 42.123) + check('42E0', 42.0) + check('42E3', 42000.0) + check('42E-1', 4.2) + check('42E+1', 420.0) + check('42.123E3', 42123.0) + check('0', 0) + check('-0', 0) + check('0.123', 0.123) + check('0E3', 0.0) + check('5E0001', 50.0) + check(str(1 << 32), 1 << 32) + check(str(1 << 64), 1 << 64) + # + x = str(sys.maxint+1) + '.123' + check(x, float(x)) + x = str(sys.maxint+1) + 'E1' + check(x, float(x)) + x = str(sys.maxint+1) + 'E-1' + check(x, float(x)) + # + check('1E400', float('inf')) + ## # these are non-standard but supported by CPython json + check('Infinity', float('inf')) + check('-Infinity', float('-inf')) + + def test_nan(self): + import math + import _pypyjson + res = _pypyjson.loads('NaN') + assert math.isnan(res) + + def test_decode_numeric_invalid(self): + import _pypyjson + def error(s): + raises(ValueError, _pypyjson.loads, s) + # + error(' 42 abc') + error('.123') + error('+123') + error('12.') + error('12.-3') + error('12E') + error('12E-') + error('0123') # numbers can't start with 0 + + def test_decode_object(self): + import _pypyjson + assert _pypyjson.loads('{}') == {} + assert _pypyjson.loads('{ }') == {} + # + s = '{"hello": "world", "aaa": "bbb"}' + assert _pypyjson.loads(s) == {'hello': 'world', + 'aaa': 'bbb'} + raises(ValueError, _pypyjson.loads, '{"key"') + raises(ValueError, _pypyjson.loads, '{"key": 42') + + def test_decode_object_nonstring_key(self): + import _pypyjson + raises(ValueError, "_pypyjson.loads('{42: 43}')") + + def test_decode_array(self): + import _pypyjson + assert _pypyjson.loads('[]') == [] + assert _pypyjson.loads('[ ]') == [] + assert _pypyjson.loads('[1]') == [1] + assert _pypyjson.loads('[1, 2]') == [1, 2] + raises(ValueError, "_pypyjson.loads('[1: 2]')") + raises(ValueError, "_pypyjson.loads('[1, 2')") + raises(ValueError, """_pypyjson.loads('["extra comma",]')""") + + def test_unicode_surrogate_pair(self): + import _pypyjson + expected = u'z\U0001d120x' + res = _pypyjson.loads('"z\\ud834\\udd20x"') + assert res == expected + + diff --git a/pypy/module/micronumpy/interp_arrayops.py b/pypy/module/micronumpy/interp_arrayops.py --- a/pypy/module/micronumpy/interp_arrayops.py +++ b/pypy/module/micronumpy/interp_arrayops.py @@ -65,7 +65,7 @@ [ 3., 4., -1.], [-1., -1., -1.]]) - + NOTE: support for not passing x and y is unsupported """ if space.is_none(w_y): @@ -122,10 +122,10 @@ for f in dtype.fields: if f not in a_dt.fields or \ dtype.fields[f] != a_dt.fields[f]: - raise OperationError(space.w_TypeError, + raise OperationError(space.w_TypeError, space.wrap("record type mismatch")) elif dtype.is_record_type() or a_dt.is_record_type(): - raise OperationError(space.w_TypeError, + raise OperationError(space.w_TypeError, space.wrap("invalid type promotion")) dtype = interp_ufuncs.find_binop_result_dtype(space, dtype, arr.get_dtype()) diff --git a/pypy/module/micronumpy/iter.py b/pypy/module/micronumpy/iter.py --- a/pypy/module/micronumpy/iter.py +++ b/pypy/module/micronumpy/iter.py @@ -46,6 +46,7 @@ calculate_slice_strides from pypy.module.micronumpy.base import W_NDimArray from pypy.module.micronumpy.arrayimpl import base +from pypy.module.micronumpy.support import product from rpython.rlib import jit # structures to describe slicing @@ -225,7 +226,7 @@ self.shape = shape self.offset = start self.shapelen = len(shape) - self._done = False + self._done = self.shapelen == 0 or product(shape) == 0 self.strides = strides self.backstrides = backstrides self.size = array.size @@ -284,7 +285,7 @@ self.backstrides = backstrides[:dim] + [0] + backstrides[dim:] self.first_line = True self.indices = [0] * len(shape) - self._done = False + self._done = array.get_size() == 0 self.offset = array.start self.dim = dim self.array = array diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py --- a/pypy/module/micronumpy/test/test_numarray.py +++ b/pypy/module/micronumpy/test/test_numarray.py @@ -293,6 +293,14 @@ b = array(a, copy=False, ndmin=4) b[0,0,0,0] = 0 assert a[0, 0] == 0 + a = array([[[]]]) + # Simulate tiling an empty array, really tests repeat, reshape + # b = tile(a, (3, 2, 5)) + reps = (3, 4, 5) + c = array(a, copy=False, subok=True, ndmin=len(reps)) + d = c.reshape(3, 4, 0) + e = d.repeat(3, 0) + assert e.shape == (9, 4, 0) def test_type(self): from numpypy import array @@ -2562,6 +2570,9 @@ a = array(range(100) + range(100) + range(100)) b = a.argsort() assert (b[:3] == [0, 100, 200]).all() + a = array([[[]]]).reshape(3,4,0) + b = a.argsort() + assert b.size == 0 def test_argsort_random(self): from numpypy import array diff --git a/pypy/module/pypyjit/test_pypy_c/model.py b/pypy/module/pypyjit/test_pypy_c/model.py --- a/pypy/module/pypyjit/test_pypy_c/model.py +++ b/pypy/module/pypyjit/test_pypy_c/model.py @@ -131,18 +131,19 @@ def has_id(self, id): return id in self.ids - def _ops_for_chunk(self, chunk, include_debug_merge_points): + def _ops_for_chunk(self, chunk, include_guard_not_invalidated): for op in chunk.operations: - if op.name != 'debug_merge_point' or include_debug_merge_points: + if op.name != 'debug_merge_point' and \ + (op.name != 'guard_not_invalidated' or include_guard_not_invalidated): yield op - def _allops(self, include_debug_merge_points=False, opcode=None): + def _allops(self, opcode=None, include_guard_not_invalidated=True): opcode_name = opcode for chunk in self.flatten_chunks(): opcode = chunk.getopcode() if opcode_name is None or \ (opcode and opcode.__class__.__name__ == opcode_name): - for op in self._ops_for_chunk(chunk, include_debug_merge_points): + for op in self._ops_for_chunk(chunk, include_guard_not_invalidated): yield op else: for op in chunk.operations: @@ -162,15 +163,15 @@ def print_ops(self, *args, **kwds): print self.format_ops(*args, **kwds) - def _ops_by_id(self, id, include_debug_merge_points=False, opcode=None): + def _ops_by_id(self, id, include_guard_not_invalidated=True, opcode=None): opcode_name = opcode target_opcodes = self.ids[id] - loop_ops = self.allops(include_debug_merge_points, opcode) + loop_ops = self.allops(opcode) for chunk in self.flatten_chunks(): opcode = chunk.getopcode() if opcode in target_opcodes and (opcode_name is None or opcode.__class__.__name__ == opcode_name): - for op in self._ops_for_chunk(chunk, include_debug_merge_points): + for op in self._ops_for_chunk(chunk, include_guard_not_invalidated): if op in loop_ops: yield op diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py --- a/pypy/module/pypyjit/test_pypy_c/test_containers.py +++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py @@ -223,5 +223,5 @@ log = self.run(main, [1000]) assert log.result == main(1000) loop, = log.loops_by_filename(self.filepath) - ops = loop.ops_by_id('getitem') + ops = loop.ops_by_id('getitem', include_guard_not_invalidated=False) assert log.opnames(ops) == [] diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -80,7 +80,7 @@ i23 = strgetitem(p10, i19) p25 = newstr(1) strsetitem(p25, 0, i23) - p93 = call(ConstClass(fromstr), p25, 16, ConstPtr(ptr70), descr=<Callr . rir EF=3>) + p93 = call(ConstClass(fromstr), p25, 16, descr=<Callr . ri EF=3>) guard_no_exception(descr=...) i94 = call(ConstClass(rbigint.toint), p93, descr=<Calli . r EF=3>) guard_no_exception(descr=...) diff --git a/pypy/tool/gdb_pypy.py b/pypy/tool/gdb_pypy.py --- a/pypy/tool/gdb_pypy.py +++ b/pypy/tool/gdb_pypy.py @@ -76,18 +76,22 @@ def invoke(self, arg, from_tty): # some magic code to automatically reload the python file while developing - ## from pypy.tool import gdb_pypy - ## reload(gdb_pypy) - ## gdb_pypy.RPyType.prog2typeids = self.prog2typeids # persist the cache - ## self.__class__ = gdb_pypy.RPyType + from pypy.tool import gdb_pypy + reload(gdb_pypy) + gdb_pypy.RPyType.prog2typeids = self.prog2typeids # persist the cache + self.__class__ = gdb_pypy.RPyType print self.do_invoke(arg, from_tty) def do_invoke(self, arg, from_tty): - obj = self.gdb.parse_and_eval(arg) - hdr = lookup(obj, '_gcheader') - tid = hdr['h_tid'] - offset = tid & 0xFFFFFFFF # 64bit only - offset = int(offset) # convert from gdb.Value to python int + try: + offset = int(arg) + except ValueError: + obj = self.gdb.parse_and_eval(arg) + hdr = lookup(obj, '_gcheader') + tid = hdr['h_tid'] + offset = tid & 0xFFFFFFFF # 64bit only + offset = int(offset) # convert from gdb.Value to python int + typeids = self.get_typeids() if offset in typeids: return typeids[offset] diff --git a/rpython/jit/backend/arm/test/conftest.py b/rpython/jit/backend/arm/test/conftest.py --- a/rpython/jit/backend/arm/test/conftest.py +++ b/rpython/jit/backend/arm/test/conftest.py @@ -16,7 +16,5 @@ dest="run_translation_tests", help="run tests that translate code") -def pytest_collect_directory(path, parent): - if not cpu.startswith('arm'): - py.test.skip("ARM(v7) tests skipped: cpu is %r" % (cpu,)) -pytest_collect_file = pytest_collect_directory +def pytest_ignore_collect(path, config): + return not cpu.startswith('arm') diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -119,7 +119,7 @@ # detect version of current compiler returncode, stdout, stderr = _run_subprocess(self.cc, '', env=self.c_environ) - r = re.match(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) + r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: self.version = int(''.join(r.groups())) / 10 - 60 else: _______________________________________________ pypy-commit mailing list [email protected] http://mail.python.org/mailman/listinfo/pypy-commit
