Author: Richard Plangger <planri...@gmail.com> Branch: s390x-backend Changeset: r81457:992b689427ce Date: 2015-12-28 14:04 +0100 http://bitbucket.org/pypy/pypy/changeset/992b689427ce/
Log: merged zero_array changes of memop-simplify3 diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py --- a/rpython/jit/backend/llsupport/rewrite.py +++ b/rpython/jit/backend/llsupport/rewrite.py @@ -488,8 +488,8 @@ elif arraydescr.itemsize == 0: total_size = arraydescr.basesize elif (self.gc_ll_descr.can_use_nursery_malloc(1) and - self.gen_malloc_nursery_varsize(arraydescr.itemsize, - v_length, op, arraydescr, kind=kind)): + self.gen_malloc_nursery_varsize(arraydescr.itemsize, v_length, + op, arraydescr, kind=kind)): # note that we cannot initialize tid here, because the array # might end up being allocated by malloc_external or some # stuff that initializes GC header fields differently @@ -525,8 +525,18 @@ # See emit_pending_zeros(). (This optimization is done by # hacking the object 'o' in-place: e.g., o.getarg(1) may be # replaced with another constant greater than 0.) - o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length], - descr=arraydescr) + #o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length], + # descr=arraydescr) + scale = arraydescr.itemsize + v_length_scaled = v_length + if not isinstance(v_length, ConstInt): + scale, offset, v_length_scaled = \ + self._emit_mul_if_factor_offset_not_supported(v_length, scale, 0) + v_scale = ConstInt(scale) + # there is probably no point in doing _emit_mul_if.. for + # c_zero! + args = [v_arr, self.c_zero, v_length_scaled, ConstInt(scale), v_scale] + o = ResOperation(rop.ZERO_ARRAY, args, descr=arraydescr) self.emit_op(o) if isinstance(v_length, ConstInt): self.last_zero_arrays.append(self._newops[-1]) @@ -644,22 +654,37 @@ # are also already in 'newops', which is the point. for op in self.last_zero_arrays: assert op.getopnum() == rop.ZERO_ARRAY + descr = op.getdescr() + scale = descr.itemsize box = op.getarg(0) try: intset = self.setarrayitems_occurred(box) except KeyError: + start_box = op.getarg(1) + length_box = op.getarg(2) + if isinstance(start_box, ConstInt): + start = start_box.getint() + op.setarg(1, ConstInt(start * scale)) + op.setarg(3, ConstInt(1)) + if isinstance(length_box, ConstInt): + stop = length_box.getint() + scaled_len = stop * scale + op.setarg(2, ConstInt(scaled_len)) + op.setarg(4, ConstInt(1)) continue assert op.getarg(1).getint() == 0 # always 'start=0' initially start = 0 while start in intset: start += 1 - op.setarg(1, ConstInt(start)) + op.setarg(1, ConstInt(start * scale)) stop = op.getarg(2).getint() assert start <= stop while stop > start and (stop - 1) in intset: stop -= 1 - op.setarg(2, ConstInt(stop - start)) + op.setarg(2, ConstInt((stop - start) * scale)) # ^^ may be ConstInt(0); then the operation becomes a no-op + op.setarg(3, ConstInt(1)) # set scale to 1 + op.setarg(4, ConstInt(1)) # set scale to 1 del self.last_zero_arrays[:] self._setarrayitems_occurred.clear() # diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py --- a/rpython/jit/backend/llsupport/test/test_rewrite.py +++ b/rpython/jit/backend/llsupport/test/test_rewrite.py @@ -36,6 +36,21 @@ assert not isinstance(descr, (str, int)) return 'gc_store(%s, %d, %s, %d)' % (baseptr, descr.offset, newvalue, descr.field_size) + def zero_array(baseptr, start, length, descr_name, descr): + assert isinstance(baseptr, str) + assert isinstance(start, (str, int)) + assert isinstance(length, (str, int)) + assert isinstance(descr_name, str) + assert not isinstance(descr, (str,int)) + itemsize = descr.itemsize + start = start * itemsize + length_scale = 1 + if isinstance(length, str): + length_scale = itemsize + else: + length = length * itemsize + return 'zero_array(%s, %s, %s, 1, %d, descr=%s)' % \ + (baseptr, start, length, length_scale, descr_name) def setarrayitem(baseptr, index, newvalue, descr): assert isinstance(baseptr, str) assert isinstance(index, (str, int)) @@ -681,7 +696,7 @@ %(cdescr.basesize + 129 * cdescr.itemsize)d) gc_store(p1, 0, 8111, %(tiddescr.field_size)s) gc_store(p1, 0, 129, %(clendescr.field_size)s) - zero_array(p1, 0, 129, descr=cdescr) + %(zero_array('p1', 0, 129, 'cdescr', cdescr))s call_n(123456) cond_call_gc_wb(p1, descr=wbdescr) %(setarrayitem('p1', 'i2', 'p3', cdescr))s @@ -703,7 +718,7 @@ %(cdescr.basesize + 130 * cdescr.itemsize)d) gc_store(p1, 0, 8111, %(tiddescr.field_size)s) gc_store(p1, 0, 130, %(clendescr.field_size)s) - zero_array(p1, 0, 130, descr=cdescr) + %(zero_array('p1', 0, 130, 'cdescr', cdescr))s call_n(123456) cond_call_gc_wb_array(p1, i2, descr=wbdescr) %(setarrayitem('p1', 'i2', 'p3', cdescr))s @@ -735,7 +750,7 @@ %(cdescr.basesize + 5 * cdescr.itemsize)d) gc_store(p1, 0, 8111, %(tiddescr.field_size)s) gc_store(p1, 0, 5, %(clendescr.field_size)s) - zero_array(p1, 0, 5, descr=cdescr) + %(zero_array('p1', 0, 5, 'cdescr', cdescr))s label(p1, i2, p3) cond_call_gc_wb_array(p1, i2, descr=wbdescr) %(setarrayitem('p1', 'i2', 'p3', cdescr))s @@ -810,7 +825,7 @@ %(cdescr.basesize + 5 * cdescr.itemsize)d) gc_store(p0, 0, 8111, %(tiddescr.field_size)s) gc_store(p0, 0, 5, %(clendescr.field_size)s) - zero_array(p0, 0, 5, descr=cdescr) + %(zero_array('p0', 0, 5, 'cdescr', cdescr))s %(setarrayitem('p0', 'i2', 'p1', cdescr))s jump() """) @@ -828,7 +843,7 @@ %(cdescr.basesize + 5 * cdescr.itemsize)d) gc_store(p0, 0, 8111, %(tiddescr.field_size)s) gc_store(p0, 0, 5, %(clendescr.field_size)s) - zero_array(p0, 2, 3, descr=cdescr) + %(zero_array('p0', 2, 3, 'cdescr', cdescr))s %(setarrayitem('p0', 1, 'p1', cdescr))s %(setarrayitem('p0', 0, 'p2', cdescr))s jump() @@ -847,7 +862,7 @@ %(cdescr.basesize + 5 * cdescr.itemsize)d) gc_store(p0, 0, 8111, %(tiddescr.field_size)s) gc_store(p0, 0, 5, %(clendescr.field_size)s) - zero_array(p0, 0, 3, descr=cdescr) + %(zero_array('p0', 0, 3, 'cdescr', cdescr))s %(setarrayitem('p0', 3, 'p1', cdescr))s %(setarrayitem('p0', 4, 'p2', cdescr))s jump() @@ -867,7 +882,7 @@ %(cdescr.basesize + 5 * cdescr.itemsize)d) gc_store(p0, 0, 8111, %(tiddescr.field_size)s) gc_store(p0, 0, 5, %(clendescr.field_size)s) - zero_array(p0, 0, 5, descr=cdescr) + %(zero_array('p0', 0, 5, 'cdescr', cdescr))s %(setarrayitem('p0', 3, 'p1', cdescr))s %(setarrayitem('p0', 2, 'p2', cdescr))s %(setarrayitem('p0', 1, 'p2', cdescr))s @@ -890,7 +905,7 @@ %(cdescr.basesize + 5 * cdescr.itemsize)d) gc_store(p0, 0, 8111, %(tiddescr.field_size)s) gc_store(p0, 0, 5, %(clendescr.field_size)s) - zero_array(p0, 5, 0, descr=cdescr) + %(zero_array('p0', 5, 0, 'cdescr', cdescr))s %(setarrayitem('p0', 3, 'p1', cdescr))s %(setarrayitem('p0', 4, 'p2', cdescr))s %(setarrayitem('p0', 0, 'p1', cdescr))s @@ -913,7 +928,7 @@ %(cdescr.basesize + 5 * cdescr.itemsize)d) gc_store(p0, 0, 8111, %(tiddescr.field_size)s) gc_store(p0, 0, 5, %(clendescr.field_size)s) - zero_array(p0, 1, 4, descr=cdescr) + %(zero_array('p0', 1, 4, 'cdescr', cdescr))s %(setarrayitem('p0', 0, 'p1', cdescr))s call_n(321321) cond_call_gc_wb(p0, descr=wbdescr) @@ -935,7 +950,7 @@ %(cdescr.basesize + 5 * cdescr.itemsize)d) gc_store(p0, 0, 8111, %(tiddescr.field_size)s) gc_store(p0, 0, 5, %(clendescr.field_size)s) - zero_array(p0, 1, 4, descr=cdescr) + %(zero_array('p0', 1, 4, 'cdescr', cdescr))s %(setarrayitem('p0', 0, 'p1', cdescr))s label(p0, p2) cond_call_gc_wb_array(p0, 1, descr=wbdescr) @@ -952,7 +967,7 @@ [p1, p2, i3] p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr) gc_store(p0, 0, i3, %(blendescr.field_size)s) - zero_array(p0, 0, i3, descr=bdescr) + %(zero_array('p0', 0, 'i3', 'bdescr', bdescr))s jump() """) @@ -966,7 +981,7 @@ [p1, p2, i3] p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr) gc_store(p0, 0, i3, %(blendescr.field_size)s) - zero_array(p0, 0, i3, descr=bdescr) + %(zero_array('p0', 0, 'i3', 'bdescr', bdescr))s cond_call_gc_wb_array(p0, 0, descr=wbdescr) %(setarrayitem('p0', 0, 'p1', bdescr))s jump() diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py --- a/rpython/jit/backend/test/runner_test.py +++ b/rpython/jit/backend/test/runner_test.py @@ -22,6 +22,7 @@ from rpython.jit.backend.detect_cpu import autodetect from rpython.jit.backend.llsupport import jitframe from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU +from rpython.jit.backend.llsupport.rewrite import GcRewriterAssembler IS_32_BIT = sys.maxint < 2**32 @@ -53,11 +54,15 @@ add_loop_instructions = ['overload for a specific cpu'] bridge_loop_instructions = ['overload for a specific cpu'] + def execute_operation(self, opname, valueboxes, result_type, descr=None): inputargs, operations = self._get_single_operation_list(opname, result_type, valueboxes, descr) + return self.execute_operations(inputargs, operations, result_type) + + def execute_operations(self, inputargs, operations, result_type): looptoken = JitCellToken() self.cpu.compile_loop(inputargs, operations, looptoken) args = [] @@ -86,6 +91,23 @@ else: assert False + def _get_operation_list(self, operations, result_type): + inputargs = [] + blacklist = set() + for op in operations: + for arg in op.getarglist(): + if not isinstance(arg, Const) and arg not in inputargs and \ + arg not in blacklist: + inputargs.append(arg) + if op.type != 'v': + blacklist.add(op) + if result_type == 'void': + op1 = ResOperation(rop.FINISH, [], descr=BasicFinalDescr(0)) + else: + op1 = ResOperation(rop.FINISH, [operations[-1]], descr=BasicFinalDescr(0)) + operations.append(op1) + return inputargs, operations + def _get_single_operation_list(self, opnum, result_type, valueboxes, descr): op0 = ResOperation(opnum, valueboxes) @@ -4983,7 +5005,7 @@ addr = llmemory.cast_ptr_to_adr(a) a_int = heaptracker.adr2int(addr) a_ref = lltype.cast_opaque_ptr(llmemory.GCREF, a) - for (start, length) in [(0, 100), (49, 49), (1, 98), + for (start, length) in [(0,100), (49, 49), (1, 98), (15, 9), (10, 10), (47, 0), (0, 4)]: for cls1 in [ConstInt, InputArgInt]: @@ -5001,11 +5023,31 @@ lengthbox = cls2(length) if cls1 == cls2 and start == length: lengthbox = startbox # same box! - self.execute_operation(rop.ZERO_ARRAY, - [InputArgRef(a_ref), - startbox, - lengthbox], - 'void', descr=arraydescr) + scale = arraydescr.itemsize + ops = [] + def emit(op): + ops.append(op) + helper = GcRewriterAssembler(None, self.cpu) + helper.emit_op = emit + offset = 0 + scale_start, s_offset, v_start = \ + helper._emit_mul_if_factor_offset_not_supported( + startbox, scale, offset) + if v_start is None: + v_start = ConstInt(s_offset) + scale_len, e_offset, v_len = \ + helper._emit_mul_if_factor_offset_not_supported( + lengthbox, scale, offset) + if v_len is None: + v_len = ConstInt(e_offset) + args = [InputArgRef(a_ref), v_start, v_len, + ConstInt(scale_start), ConstInt(scale_len)] + ops.append(ResOperation(rop.ZERO_ARRAY, args, + descr=arraydescr)) + + scalebox = ConstInt(arraydescr.itemsize) + inputargs, oplist = self._get_operation_list(ops,'void') + self.execute_operations(inputargs, oplist, 'void') assert len(a) == 100 for i in range(100): val = (0 if start <= i < start + length diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1528,25 +1528,6 @@ # return shift - def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc, - base_loc, ofs_loc): - assert isinstance(itemsize_loc, ImmedLoc) - itemsize = itemsize_loc.value - if isinstance(index_loc, ImmedLoc): - temp_loc = imm(index_loc.value * itemsize) - shift = 0 - elif valid_addressing_size(itemsize): - temp_loc = index_loc - shift = get_scale(itemsize) - else: - assert isinstance(index_loc, RegLoc) - assert isinstance(temp_loc, RegLoc) - assert not temp_loc.is_xmm - shift = self._imul_const_scaled(self.mc, temp_loc.value, - index_loc.value, itemsize) - assert isinstance(ofs_loc, ImmedLoc) - return AddressLoc(base_loc, temp_loc, shift, ofs_loc.value) - def genop_discard_increment_debug_counter(self, op, arglocs): # The argument should be an immediate address. This should # generate code equivalent to a GETFIELD_RAW, an ADD(1), and a @@ -2379,6 +2360,7 @@ shift = self._imul_const_scaled(self.mc, edi.value, varsizeloc.value, itemsize) varsizeloc = edi + # now varsizeloc is a register != eax. The size of # the variable part of the array is (varsizeloc << shift) assert arraydescr.basesize >= self.gc_minimal_size_in_nursery @@ -2468,13 +2450,8 @@ assert isinstance(null_loc, RegLoc) and null_loc.is_xmm baseofs = baseofs_loc.value nbytes = bytes_loc.value - if valid_addressing_size(itemsize_loc.value): - scale = get_scale(itemsize_loc.value) - else: - assert isinstance(startindex_loc, ImmedLoc) - baseofs += startindex_loc.value * itemsize_loc.value - startindex_loc = imm0 - scale = 0 + assert valid_addressing_size(itemsize_loc.value) + scale = get_scale(itemsize_loc.value) null_reg_cleared = False i = 0 while i < nbytes: diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -9,7 +9,7 @@ from rpython.jit.backend.llsupport.gcmap import allocate_gcmap from rpython.jit.backend.llsupport.regalloc import (FrameManager, BaseRegalloc, RegisterManager, TempVar, compute_vars_longevity, is_comparison_or_ovf_op, - valid_addressing_size) + valid_addressing_size, get_scale) from rpython.jit.backend.x86 import rx86 from rpython.jit.backend.x86.arch import (WORD, JITFRAME_FIXED_SIZE, IS_X86_32, IS_X86_64, DEFAULT_FRAME_BYTES) @@ -32,6 +32,7 @@ from rpython.rtyper.annlowlevel import cast_instance_to_gcref from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rtyper.lltypesystem.lloperation import llop +from rpython.jit.backend.x86.regloc import AddressLoc class X86RegisterManager(RegisterManager): @@ -1389,21 +1390,39 @@ def consider_keepalive(self, op): pass + def _scaled_addr(self, index_loc, itemsize_loc, + base_loc, ofs_loc): + assert isinstance(itemsize_loc, ImmedLoc) + itemsize = itemsize_loc.value + if isinstance(index_loc, ImmedLoc): + temp_loc = imm(index_loc.value * itemsize) + shift = 0 + else: + assert valid_addressing_size(itemsize), "rewrite did not correctly handle shift/mul!" + temp_loc = index_loc + shift = get_scale(itemsize) + assert isinstance(ofs_loc, ImmedLoc) + return AddressLoc(base_loc, temp_loc, shift, ofs_loc.value) + def consider_zero_array(self, op): - itemsize, baseofs, _ = unpack_arraydescr(op.getdescr()) + _, baseofs, _ = unpack_arraydescr(op.getdescr()) length_box = op.getarg(2) + + scale_box = op.getarg(3) + assert isinstance(scale_box, ConstInt) + start_itemsize = scale_box.value + + len_scale_box = op.getarg(4) + assert isinstance(len_scale_box, ConstInt) + len_itemsize = len_scale_box.value + # rewrite handles the mul of a constant length box + constbytes = -1 if isinstance(length_box, ConstInt): - constbytes = length_box.getint() * itemsize - if constbytes == 0: - return # nothing to do - else: - constbytes = -1 + constbytes = length_box.getint() args = op.getarglist() base_loc = self.rm.make_sure_var_in_reg(args[0], args) startindex_loc = self.rm.make_sure_var_in_reg(args[1], args) - if 0 <= constbytes <= 16 * 8 and ( - valid_addressing_size(itemsize) or - isinstance(startindex_loc, ImmedLoc)): + if 0 <= constbytes <= 16 * 8: if IS_X86_64: null_loc = X86_64_XMM_SCRATCH_REG else: @@ -1411,7 +1430,7 @@ null_loc = self.xrm.force_allocate_reg(null_box) self.xrm.possibly_free_var(null_box) self.perform_discard(op, [base_loc, startindex_loc, - imm(constbytes), imm(itemsize), + imm(constbytes), imm(start_itemsize), imm(baseofs), null_loc]) else: # base_loc and startindex_loc are in two regs here (or they are @@ -1421,10 +1440,9 @@ # args[2], because we're still needing the latter. dstaddr_box = TempVar() dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, [args[2]]) - itemsize_loc = imm(itemsize) - dst_addr = self.assembler._get_interiorfield_addr( - dstaddr_loc, startindex_loc, itemsize_loc, - base_loc, imm(baseofs)) + itemsize_loc = imm(start_itemsize) + dst_addr = self._scaled_addr(startindex_loc, itemsize_loc, + base_loc, imm(baseofs)) self.assembler.mc.LEA(dstaddr_loc, dst_addr) # if constbytes >= 0: @@ -1433,15 +1451,15 @@ # load length_loc in a register different than dstaddr_loc length_loc = self.rm.make_sure_var_in_reg(length_box, [dstaddr_box]) - if itemsize > 1: + if len_itemsize > 1: # we need a register that is different from dstaddr_loc, # but which can be identical to length_loc (as usual, # only if the length_box is not used by future operations) bytes_box = TempVar() bytes_loc = self.rm.force_allocate_reg(bytes_box, [dstaddr_box]) - b_adr = self.assembler._get_interiorfield_addr( - bytes_loc, length_loc, itemsize_loc, imm0, imm0) + len_itemsize_loc = imm(len_itemsize) + b_adr = self._scaled_addr(length_loc, len_itemsize_loc, imm0, imm0) self.assembler.mc.LEA(bytes_loc, b_adr) length_box = bytes_box length_loc = bytes_loc diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py --- a/rpython/jit/backend/x86/vector_ext.py +++ b/rpython/jit/backend/x86/vector_ext.py @@ -9,7 +9,7 @@ ebp, r8, r9, r10, r11, r12, r13, r14, r15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG, AddressLoc) -from rpython.jit.backend.llsupport.regalloc import (get_scale, valid_addressing_size) +from rpython.jit.backend.llsupport.regalloc import get_scale from rpython.jit.metainterp.resoperation import (rop, ResOperation, VectorOp, VectorGuardOp) from rpython.rlib.objectmodel import we_are_translated diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -1225,7 +1225,7 @@ 'SETINTERIORFIELD_GC/3d/n', 'SETINTERIORFIELD_RAW/3d/n', # right now, only used by tests 'SETFIELD_GC/2d/n', - 'ZERO_ARRAY/3d/n', # only emitted by the rewrite, clears (part of) an array + 'ZERO_ARRAY/4d/n', # only emitted by the rewrite, clears (part of) an array # [arraygcptr, firstindex, length], descr=ArrayDescr 'SETFIELD_RAW/2d/n', 'STRSETITEM/3/n', _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit