Author: Armin Rigo <ar...@tunes.org> Branch: py3k Changeset: r87293:f854ee56f616 Date: 2016-09-21 22:33 +0200 http://bitbucket.org/pypy/pypy/changeset/f854ee56f616/
Log: hg merge default diff --git a/rpython/doc/jit/backend.rst b/rpython/doc/jit/backend.rst new file mode 100644 --- /dev/null +++ b/rpython/doc/jit/backend.rst @@ -0,0 +1,263 @@ +========================= +PyPy's assembler backends +========================= + +Draft notes about the organization of assembler backends in the PyPy JIT, in 2016 +================================================================================= + + +input: linear sequence of instructions, called a "trace". + +A trace is a sequence of instructions in SSA form. Most instructions +correspond to one or a few CPU-level instructions. There are a few +meta-instructions like `label` and debugging stuff. All branching is +done with guards, which are instructions that check that a condition is +true and exit the trace if not. A failing guard can have a new trace +added to it later, called a "bridge". A patched guard becomes a direct +`Jcond` instruction going to the bridge, with no indirection, no +register spilling, etc. + +A trace ends with either a `return` or a `jump to label`. The target +label is either inside the same trace, or in some older one. For +historical reasons we call a "loop" a trace that is not a bridge. The +machine code that we generate is organized as a forest of trees; the +trunk of the tree is a "loop", and the branches are all bridges +(branching off the trunk or off another branch). + +* every trunk or branch that ends in a `jump to label` can target a + label from a different tree, too. + +* the whole process of assembling a loop or a branch is basically + single-threaded, so no synchronization issue there (including to patch + older generated instructions). + +* the generated assembler has got a "frame" in %rbp, which is actually + not on the stack at all, but is a GC object (called a "jitframe"). + Spilling goes there. + +* the guards are `Jcond` to a very small piece of generated code, which + is basically pushing a couple of constants on the stack and then + jumping to the general guard-recovery code. That code will save the + registers into the jitframe and then exit the whole generated + function. The caller of that generated function checks how it + finished: if it finished by hitting a guard, then the caller is + responsible for calling the "blackhole interpreter". This is the part + of the front-end that recovers from failing guards and finishes + running the frame (including, possibly, by jumping again into + generated assembler). + + +Details about the JITting process: + +* front-end and optimization pass + +* rewrite (includes gc related transformation as well as simplifactions) + +* assembler generation + + +Front-end and optimization pass +------------------------------- + +Not discussed here in detail. This produces loops and bridges using an +instruction set that is "high-level" in some sense: it contains +intructions like "new"/"new_array", and +"setfield"/"setarrayitem"/"setinteriorfield" which describe the action +of storing a value in a precise field of the structure or array. For +example, the "setfield" action might require implicitly a GC write +barrier. This is the high-level trace that we send to the following +step. + + +Rewrite +------- + +A mostly but not completely CPU-independent phase: lowers some +instructions. For example, the variants of "new" are lowered to +"malloc" and a few "gc_store": it bumps the pointer of the GC and then +sets a few fields explicitly in the newly allocated structure. The +"setfield" is replaced with a "cond_gc_wb_call" (conditional call to the +write barrier) if needed, followed by a "gc_store". + +The "gc_store" instruction can be encoded in a single MOV assembler +instruction, but is not as flexible as a MOV. The address is always +specified as "some GC pointer + an offset". We don't have the notion of +interior pointer for GC objects. + +A different instruction, "gc_store_indexed", offers additional operands, +which can be mapped to a single MOV instruction using forms like +`[rax+8*rcx+24]`. + +Some other complex instructions pass through to the backend, which must +deal with them: for example, "card marking" in the GC. (Writing an +object pointer inside an array would require walking the whole array +later to find "young" references. Instead of that, we flip a bit for +every range of 128 entries. This is a common GC optimization.) Setting +the card bit of a GC object requires a sequence of assembler +instructions that depends too much on the target CPU to be expressed +explicitly here (moreover, it contains a few branches, which are hard to +express at this level). + + +Assembly +-------- + +No fancy code generation technique, but greedy forward pass that tries +to avoid some pitfalls + + +Handling instructions +~~~~~~~~~~~~~~~~~~~~~ + +* One by one (forward direction). Each instruction asks the register + allocator to ensure that some arguments are in registers (not in the + jitframe); asks for a register to put its result into; and asks for + additional scratch registers that will be freed at the end of the + instruction. There is a special case for boolean variables: they are + stored in the condition code flags instead of being materialized as a + 0/1 value. (They are materialized later, except in the common case + where they are only used by the next `guard_false` or `guard_true` and + then forgotten.) + +* Instruction arguments are loaded into a register on demand. This + makes the backend quite easy to write, but leads do some bad + decisions. + + +Linear scan register allocation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Although it's always a linear trace that we consider, we don't use +advanced techniques for register allocation: we do forward, on-demand +allocation as the backend produces the assembler. When it asks for a +register to put some value into, we give it any free register, without +consideration for what will be done with it later. We compute the +longevity of all variables, but only use it when choosing which register +to spill (we spill the variable with the longest longevity). + +This works to some extend because it is well integrated with the earlier +optimization pass. Loops are unrolled once by the optimization pass to +allow more powerful optimizations---the optimization pass itself is the +place that benefits the most, but it also has benefits here in the +assembly pass. These are: + +* The first peeling initializes the register binding on the first use. + +* This leads to an already allocated register of the trace loop. + +* As well as allocated registers when exiting bridges + +[Try to better allocate registers to match the ABI (minor to non benefit +in the current state)] + + +More complex mappings +~~~~~~~~~~~~~~~~~~~~~ + +Some instructions generate more complex code. These are either or both of: + +* complex instructions generating some local control flow, like + "cond_gc_wb_call" (for write barriers), "call_assembler" (a call + followed by a few checks). + +* instructions that invoke custom assembler helpers, like the slow-path + of write barriers or the slow-path of allocations. These slow-paths + are typically generated too, so that we are not constrained by the + usual calling conventions. + + +GC pointers +~~~~~~~~~~~ + +Around most CALL instructions, we need to record a description of where +the GC pointers are (registers and stack frame). This is needed in case +the CALL invokes a garbage collection. The GC pointers can move; the +pointers in the registers and stack frame are updated by the GC. That's +a reason for why we don't have explicit interior pointers. + +GC pointers can appear as constants in the trace. We are busy changing +that to use a constant table and `MOV REG, (%RIP+offset)`. The +"constant" in the table is actually updated by the GC if the object +move. + + +Vectorization +~~~~~~~~~~~~~ + +Optimization developed to use SIMD instructions for trace loops. Primary +idea was to use it as an optimization of micro numpy. It has several +passes on the already optimized trace. + +Shortly explained: It builds dependencies for an unrolled trace loop, +gathering pairs/packs of operations that could be executed in parallel +and finally schedules the operations. + +What did it add to the code base: + +* Dependencies can be constructed + +* Code motion of guards to relax dependencies + +* Scheduler to reorder trace + +* Array bound check removal (especially for unrolled traces) + +What can it do: + +* Transform vector loops (element wise operations) + +* Accumulation (`reduce([...],operator,0)`). Requires Operation to be + associative and commutative + +* SSE 4.1 as "vector backend" + + +We do not +~~~~~~~~~ + +* Keep tracing data around to reoptimize the trace tree. (Once a trace + is compiled, minimal data is kept.) This is one reason (there are + others in the front-end) for the following result: JIT-compiling a + small loop with two common paths ends up as one "loop" and one bridge + assembled, and the bridge-following path is slightly less efficient. + This is notably because this bridge is assembled with two constraints: + the input registers are fixed (from the guard), and the output + registers are fixed (from the jump target); usually these two sets of + fixed registers are different, and copying around is needed. + +* We don't join trace tails: we only assemble *trees*. + +* We don't do any reordering (neither of trace instructions nor of + individual assembler instructions) + +* We don't do any cross-instruction optimization that makes sense only + for the backend and can't easily be expressed at a higher level. I'm + sure there are tons of examples of that, but e.g. loading a large + constant in a register that will survive for several instructions; + moving out of loops *parts* of some instruction like the address + calculation; etc. etc. + +* Other optimization opportunities I can think about: look at the + function prologue/epilogue; look at the overhead (small but not zero) + at the start of a bridge. Also check if the way guards are + implemented makes sense. Also, we generate large-ish sequences of + assembler instructions with tons of `Jcond` that are almost never + followed; any optimization opportunity there? (They all go forward, + if it changes anything.) In theory we could also replace some of + these with a signal handler on segfault (e.g. `guard_nonnull_class`). + + +a GCC or LLVM backend? +~~~~~~~~~~~~~~~~~~~~~~ + +At least for comparison we'd like a JIT backend that emits its code +using GCC or LLVM (irrespective of the time it would take). But it's +hard to map reasonably well the guards to the C language or to LLVM IR. +The problems are: (1) we have many guards, we would like to avoid having +many paths that each do a full +saving-all-local-variables-that-are-still-alive; (2) it's hard to patch +a guard when a bridge is compiled from it; (3) instructions like a CALL +need to expose the local variables that are GC pointers; CALL_MAY_FORCE +need to expose *all* local variables for optional off-line +reconstruction of the interpreter state. + diff --git a/rpython/doc/jit/index.rst b/rpython/doc/jit/index.rst --- a/rpython/doc/jit/index.rst +++ b/rpython/doc/jit/index.rst @@ -26,6 +26,7 @@ optimizer virtualizable vectorization + backend - :doc:`Overview <overview>`: motivating our approach @@ -34,5 +35,8 @@ - :doc:`Optimizer <optimizer>`: the step between tracing and writing machine code -- :doc:`Virtulizable <virtualizable>` how virtualizables work and what they are - (in other words how to make frames more efficient). +- :doc:`Virtualizable <virtualizable>`: how virtualizables work and what + they are (in other words how to make frames more efficient). + +- :doc:`Assembler backend <backend>`: draft notes about the organization + of the assembler backends diff --git a/rpython/jit/backend/test/test_ll_random.py b/rpython/jit/backend/test/test_ll_random.py --- a/rpython/jit/backend/test/test_ll_random.py +++ b/rpython/jit/backend/test/test_ll_random.py @@ -710,6 +710,12 @@ # 6. a conditional call (for now always with no exception raised) class CondCallOperation(BaseCallOperation): + + def filter(self, builder): + if not builder.cpu.supports_cond_call_value and \ + self.opnum == rop.COND_CALL_VALUE_I: + raise CannotProduceOperation + def produce_into(self, builder, r): fail_subset = builder.subset_of_intvars(r) if self.opnum == rop.COND_CALL: diff --git a/rpython/jit/backend/test/zll_stress.py b/rpython/jit/backend/test/zll_stress.py --- a/rpython/jit/backend/test/zll_stress.py +++ b/rpython/jit/backend/test/zll_stress.py @@ -1,6 +1,7 @@ from rpython.jit.backend.test.test_random import check_random_function, Random from rpython.jit.backend.test.test_ll_random import LLtypeOperationBuilder from rpython.jit.backend.detect_cpu import getcpuclass +from rpython.jit.metainterp.resoperation import rop import platform CPU = getcpuclass() diff --git a/rpython/rlib/clibffi.py b/rpython/rlib/clibffi.py --- a/rpython/rlib/clibffi.py +++ b/rpython/rlib/clibffi.py @@ -359,12 +359,13 @@ tpe.members[n] = lltype.nullptr(FFI_TYPE_P.TO) return tpe +@specialize.memo() def cast_type_to_ffitype(tp): """ This function returns ffi representation of rpython type tp """ return TYPE_MAP[tp] -cast_type_to_ffitype._annspecialcase_ = 'specialize:memo' +@specialize.argtype(1) def push_arg_as_ffiptr(ffitp, arg, ll_buf): # This is for primitive types. Note that the exact type of 'arg' may be # different from the expected 'c_size'. To cope with that, we fall back @@ -396,7 +397,6 @@ arg >>= 8 else: raise AssertionError -push_arg_as_ffiptr._annspecialcase_ = 'specialize:argtype(1)' # type defs for callback and closure userdata @@ -470,12 +470,12 @@ FUNCFLAG_USE_ERRNO = 8 FUNCFLAG_USE_LASTERROR = 16 +@specialize.arg(1) # hack :-/ def get_call_conv(flags, from_jit): if _WIN32 and not _WIN64 and (flags & FUNCFLAG_CDECL == 0): return FFI_STDCALL else: return FFI_DEFAULT_ABI -get_call_conv._annspecialcase_ = 'specialize:arg(1)' # hack :-/ class AbstractFuncPtr(object): @@ -599,6 +599,7 @@ else: self.restype_size = -1 + @specialize.argtype(1) def push_arg(self, value): #if self.pushed_args == self.argnum: # raise TypeError("Too many arguments, eats %d, pushed %d" % @@ -618,7 +619,6 @@ push_arg_as_ffiptr(self.argtypes[self.pushed_args], value, self.ll_args[self.pushed_args]) self.pushed_args += 1 - push_arg._annspecialcase_ = 'specialize:argtype(1)' def _check_args(self): if self.pushed_args < self.argnum: @@ -627,6 +627,7 @@ def _clean_args(self): self.pushed_args = 0 + @specialize.arg(1) def call(self, RES_TP): self._check_args() ffires = c_ffi_call(self.ll_cif, self.funcsym, @@ -645,7 +646,6 @@ self._clean_args() check_fficall_result(ffires, self.flags) return res - call._annspecialcase_ = 'specialize:arg(1)' def __del__(self): if self.ll_args: diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py --- a/rpython/rlib/jit.py +++ b/rpython/rlib/jit.py @@ -280,6 +280,7 @@ @oopspec("jit.isconstant(value)") +@specialize.call_location() def isconstant(value): """ While tracing, returns whether or not the value is currently known to be @@ -289,9 +290,9 @@ This is for advanced usage only. """ return NonConstant(False) -isconstant._annspecialcase_ = "specialize:call_location" @oopspec("jit.isvirtual(value)") +@specialize.call_location() def isvirtual(value): """ Returns if this value is virtual, while tracing, it's relatively @@ -300,7 +301,6 @@ This is for advanced usage only. """ return NonConstant(False) -isvirtual._annspecialcase_ = "specialize:call_location" @specialize.call_location() def loop_unrolling_heuristic(lst, size, cutoff=2): @@ -401,28 +401,27 @@ hop.exception_cannot_occur() return hop.inputconst(lltype.Signed, _we_are_jitted) - +@oopspec('jit.current_trace_length()') def current_trace_length(): """During JIT tracing, returns the current trace length (as a constant). If not tracing, returns -1.""" if NonConstant(False): return 73 return -1 -current_trace_length.oopspec = 'jit.current_trace_length()' +@oopspec('jit.debug(string, arg1, arg2, arg3, arg4)') def jit_debug(string, arg1=-sys.maxint-1, arg2=-sys.maxint-1, arg3=-sys.maxint-1, arg4=-sys.maxint-1): """When JITted, cause an extra operation JIT_DEBUG to appear in the graphs. Should not be left after debugging.""" keepalive_until_here(string) # otherwise the whole function call is removed -jit_debug.oopspec = 'jit.debug(string, arg1, arg2, arg3, arg4)' +@oopspec('jit.assert_green(value)') +@specialize.argtype(0) def assert_green(value): """Very strong assert: checks that 'value' is a green (a JIT compile-time constant).""" keepalive_until_here(value) -assert_green._annspecialcase_ = 'specialize:argtype(0)' -assert_green.oopspec = 'jit.assert_green(value)' class AssertGreenFailed(Exception): pass @@ -457,6 +456,7 @@ # ____________________________________________________________ # VRefs +@oopspec('virtual_ref(x)') @specialize.argtype(0) def virtual_ref(x): """Creates a 'vref' object that contains a reference to 'x'. Calls @@ -467,14 +467,13 @@ dereferenced (by the call syntax 'vref()'), it returns 'x', which is then forced.""" return DirectJitVRef(x) -virtual_ref.oopspec = 'virtual_ref(x)' +@oopspec('virtual_ref_finish(x)') @specialize.argtype(1) def virtual_ref_finish(vref, x): """See docstring in virtual_ref(x)""" keepalive_until_here(x) # otherwise the whole function call is removed _virtual_ref_finish(vref, x) -virtual_ref_finish.oopspec = 'virtual_ref_finish(x)' def non_virtual_ref(x): """Creates a 'vref' that just returns x when called; nothing more special. @@ -831,6 +830,7 @@ jit_opencoder_model """ +@specialize.arg(0) def set_user_param(driver, text): """Set the tunable JIT parameters from a user-supplied string following the format 'param=value,param=value', or 'off' to @@ -866,7 +866,6 @@ break else: raise ValueError -set_user_param._annspecialcase_ = 'specialize:arg(0)' # ____________________________________________________________ # diff --git a/rpython/rlib/listsort.py b/rpython/rlib/listsort.py --- a/rpython/rlib/listsort.py +++ b/rpython/rlib/listsort.py @@ -1,4 +1,5 @@ from rpython.rlib.rarithmetic import ovfcheck +from rpython.rlib.objectmodel import specialize ## ------------------------------------------------------------------------ @@ -141,6 +142,12 @@ # or, IOW, the first k elements of a should precede key, and the last # n-k should follow key. + # hint for the annotator: the argument 'rightmost' is always passed in as + # a constant (either True or False), so we can specialize the function for + # the two cases. (This is actually needed for technical reasons: the + # variable 'lower' must contain a known method, which is the case in each + # specialized version but not in the unspecialized one.) + @specialize.arg(4) def gallop(self, key, a, hint, rightmost): assert 0 <= hint < a.len if rightmost: @@ -212,12 +219,6 @@ assert lastofs == ofs # so a[ofs-1] < key <= a[ofs] return ofs - # hint for the annotator: the argument 'rightmost' is always passed in as - # a constant (either True or False), so we can specialize the function for - # the two cases. (This is actually needed for technical reasons: the - # variable 'lower' must contain a known method, which is the case in each - # specialized version but not in the unspecialized one.) - gallop._annspecialcase_ = "specialize:arg(4)" # ____________________________________________________________ diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py --- a/rpython/rlib/rarithmetic.py +++ b/rpython/rlib/rarithmetic.py @@ -37,6 +37,7 @@ from rpython.rlib import objectmodel from rpython.flowspace.model import Constant, const from rpython.flowspace.specialcase import register_flow_sc +from rpython.rlib.objectmodel import specialize """ Long-term target: @@ -135,14 +136,15 @@ # We deal directly with overflow there anyway. return r_longlonglong(n) +@specialize.argtype(0) def widen(n): from rpython.rtyper.lltypesystem import lltype if _should_widen_type(lltype.typeOf(n)): return intmask(n) else: return n -widen._annspecialcase_ = 'specialize:argtype(0)' +@specialize.memo() def _should_widen_type(tp): from rpython.rtyper.lltypesystem import lltype, rffi if tp is lltype.Bool: @@ -153,19 +155,18 @@ assert issubclass(r_class, base_int) return r_class.BITS < LONG_BIT or ( r_class.BITS == LONG_BIT and r_class.SIGNED) -_should_widen_type._annspecialcase_ = 'specialize:memo' # the replacement for sys.maxint maxint = int(LONG_TEST - 1) # for now, it should be equal to sys.maxint on all supported platforms assert maxint == sys.maxint +@specialize.argtype(0) def is_valid_int(r): if objectmodel.we_are_translated(): return isinstance(r, int) return isinstance(r, (base_int, int, long, bool)) and ( -maxint - 1 <= r <= maxint) -is_valid_int._annspecialcase_ = 'specialize:argtype(0)' def ovfcheck(r): "NOT_RPYTHON" @@ -225,12 +226,12 @@ return build_int(None, self_type.SIGNED, max(self_type.BITS, other_type.BITS)) raise AssertionError("Merging these types (%s, %s) is not supported" % (self_type, other_type)) +@specialize.memo() def signedtype(t): if t in (bool, int, long): return True else: return t.SIGNED -signedtype._annspecialcase_ = 'specialize:memo' def normalizedinttype(t): if t is int: @@ -241,11 +242,12 @@ assert t.BITS <= r_longlong.BITS return build_int(None, t.SIGNED, r_longlong.BITS) +@specialize.argtype(0) def most_neg_value_of_same_type(x): from rpython.rtyper.lltypesystem import lltype return most_neg_value_of(lltype.typeOf(x)) -most_neg_value_of_same_type._annspecialcase_ = 'specialize:argtype(0)' +@specialize.memo() def most_neg_value_of(tp): from rpython.rtyper.lltypesystem import lltype, rffi if tp is lltype.Signed: @@ -256,13 +258,13 @@ return r_class(-(r_class.MASK >> 1) - 1) else: return r_class(0) -most_neg_value_of._annspecialcase_ = 'specialize:memo' +@specialize.argtype(0) def most_pos_value_of_same_type(x): from rpython.rtyper.lltypesystem import lltype return most_pos_value_of(lltype.typeOf(x)) -most_pos_value_of_same_type._annspecialcase_ = 'specialize:argtype(0)' +@specialize.memo() def most_pos_value_of(tp): from rpython.rtyper.lltypesystem import lltype, rffi if tp is lltype.Signed: @@ -273,8 +275,8 @@ return r_class(r_class.MASK >> 1) else: return r_class(r_class.MASK) -most_pos_value_of._annspecialcase_ = 'specialize:memo' +@specialize.memo() def is_signed_integer_type(tp): from rpython.rtyper.lltypesystem import lltype, rffi if tp is lltype.Signed: @@ -284,7 +286,6 @@ return r_class.SIGNED except KeyError: return False # not an integer type -is_signed_integer_type._annspecialcase_ = 'specialize:memo' def highest_bit(n): """ @@ -676,7 +677,7 @@ from rpython.rtyper.lltypesystem.lloperation import llop return llop.int_mod(lltype.Signed, x, y) -@objectmodel.specialize.ll() +@specialize.ll() def byteswap(arg): """ Convert little->big endian and the opposite """ diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -90,16 +90,16 @@ FIVEARY_CUTOFF = 8 +@specialize.argtype(0) def _mask_digit(x): return UDIGIT_MASK(x & MASK) -_mask_digit._annspecialcase_ = 'specialize:argtype(0)' def _widen_digit(x): return rffi.cast(LONG_TYPE, x) +@specialize.argtype(0) def _store_digit(x): return rffi.cast(STORE_TYPE, x) -_store_digit._annspecialcase_ = 'specialize:argtype(0)' def _load_unsigned_digit(x): return rffi.cast(UNSIGNED_TYPE, x) @@ -175,11 +175,11 @@ return _load_unsigned_digit(self._digits[x]) udigit._always_inline_ = True + @specialize.argtype(2) def setdigit(self, x, val): val = _mask_digit(val) assert val >= 0 self._digits[x] = _store_digit(val) - setdigit._annspecialcase_ = 'specialize:argtype(2)' setdigit._always_inline_ = True def numdigits(self): @@ -1312,6 +1312,7 @@ return res +@specialize.argtype(0) def digits_from_nonneg_long(l): digits = [] while True: @@ -1319,8 +1320,8 @@ l = l >> SHIFT if not l: return digits[:] # to make it non-resizable -digits_from_nonneg_long._annspecialcase_ = "specialize:argtype(0)" - + +@specialize.argtype(0) def digits_for_most_neg_long(l): # This helper only works if 'l' is the most negative integer of its # type, which in base 2 looks like: 1000000..0000 @@ -1335,8 +1336,8 @@ assert l & MASK == l digits.append(_store_digit(l)) return digits[:] # to make it non-resizable -digits_for_most_neg_long._annspecialcase_ = "specialize:argtype(0)" - + +@specialize.argtype(0) def args_from_rarith_int1(x): if x > 0: return digits_from_nonneg_long(x), 1 @@ -1348,11 +1349,10 @@ else: # the most negative integer! hacks needed... return digits_for_most_neg_long(x), -1 -args_from_rarith_int1._annspecialcase_ = "specialize:argtype(0)" - + +@specialize.argtype(0) def args_from_rarith_int(x): return args_from_rarith_int1(widen(x)) -args_from_rarith_int._annspecialcase_ = "specialize:argtype(0)" # ^^^ specialized by the precise type of 'x', which is typically a r_xxx # instance from rlib.rarithmetic @@ -1909,6 +1909,7 @@ i += 1 return borrow +@specialize.argtype(2) def _muladd1(a, n, extra=0): """Multiply by a single digit and add a single digit, ignoring the sign. """ @@ -1926,7 +1927,7 @@ z.setdigit(i, carry) z._normalize() return z -_muladd1._annspecialcase_ = "specialize:argtype(2)" + def _v_lshift(z, a, m, d): """ Shift digit vector a[0:m] d bits left, with 0 <= d < SHIFT. Put * result in z[0:m], and return the d bits shifted out of the top. @@ -2178,6 +2179,7 @@ ad = -ad return ad +@specialize.arg(0) def _loghelper(func, arg): """ A decent logarithm is easy to compute even for huge bigints, but libm can't @@ -2195,7 +2197,6 @@ # CAUTION: e*SHIFT may overflow using int arithmetic, # so force use of double. */ return func(x) + (e * float(SHIFT) * func(2.0)) -_loghelper._annspecialcase_ = 'specialize:arg(0)' # ____________________________________________________________ @@ -2519,6 +2520,7 @@ return output.build() +@specialize.arg(1) def _bitwise(a, op, b): # '&', '|', '^' """ Bitwise and/or/xor operations """ @@ -2598,8 +2600,8 @@ return z return z.invert() -_bitwise._annspecialcase_ = "specialize:arg(1)" - + +@specialize.arg(1) def _int_bitwise(a, op, b): # '&', '|', '^' """ Bitwise and/or/xor operations """ @@ -2682,7 +2684,6 @@ return z return z.invert() -_int_bitwise._annspecialcase_ = "specialize:arg(1)" ULONGLONG_BOUND = r_ulonglong(1L << (r_longlong.BITS-1)) LONGLONG_MIN = r_longlong(-(1L << (r_longlong.BITS-1))) diff --git a/rpython/rlib/rmmap.py b/rpython/rlib/rmmap.py --- a/rpython/rlib/rmmap.py +++ b/rpython/rlib/rmmap.py @@ -10,7 +10,7 @@ from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib import rposix from rpython.translator.tool.cbuild import ExternalCompilationInfo -from rpython.rlib.objectmodel import we_are_translated +from rpython.rlib.objectmodel import we_are_translated, specialize from rpython.rlib.nonconst import NonConstant from rpython.rlib.rarithmetic import intmask @@ -239,12 +239,12 @@ _, _VirtualProtect_safe = winexternal('VirtualProtect', [rffi.VOIDP, rffi.SIZE_T, DWORD, LPDWORD], BOOL) + @specialize.ll() def VirtualProtect(addr, size, mode, oldmode_ptr): return _VirtualProtect_safe(addr, rffi.cast(rffi.SIZE_T, size), rffi.cast(DWORD, mode), oldmode_ptr) - VirtualProtect._annspecialcase_ = 'specialize:ll' VirtualFree, VirtualFree_safe = winexternal('VirtualFree', [rffi.VOIDP, rffi.SIZE_T, DWORD], BOOL) diff --git a/rpython/rlib/rstruct/runpack.py b/rpython/rlib/rstruct/runpack.py --- a/rpython/rlib/rstruct/runpack.py +++ b/rpython/rlib/rstruct/runpack.py @@ -7,6 +7,7 @@ from struct import unpack from rpython.rlib.rstruct.formatiterator import FormatIterator from rpython.rlib.rstruct.error import StructError +from rpython.rlib.objectmodel import specialize class MasterReader(object): def __init__(self, s): @@ -99,14 +100,14 @@ self._create_unpacking_func() return True +@specialize.memo() def create_unpacker(unpack_str): fmtiter = FrozenUnpackIterator(unpack_str) fmtiter.interpret(unpack_str) assert fmtiter._freeze_() return fmtiter -create_unpacker._annspecialcase_ = 'specialize:memo' +@specialize.arg(0) def runpack(fmt, input): unpacker = create_unpacker(fmt) return unpacker.unpack(input) -runpack._annspecialcase_ = 'specialize:arg(0)' _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit