Author: Richard Plangger <[email protected]>
Branch: s390x-backend
Changeset: r81457:992b689427ce
Date: 2015-12-28 14:04 +0100
http://bitbucket.org/pypy/pypy/changeset/992b689427ce/
Log: merged zero_array changes of memop-simplify3
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -488,8 +488,8 @@
elif arraydescr.itemsize == 0:
total_size = arraydescr.basesize
elif (self.gc_ll_descr.can_use_nursery_malloc(1) and
- self.gen_malloc_nursery_varsize(arraydescr.itemsize,
- v_length, op, arraydescr, kind=kind)):
+ self.gen_malloc_nursery_varsize(arraydescr.itemsize, v_length,
+ op, arraydescr, kind=kind)):
# note that we cannot initialize tid here, because the array
# might end up being allocated by malloc_external or some
# stuff that initializes GC header fields differently
@@ -525,8 +525,18 @@
# See emit_pending_zeros(). (This optimization is done by
# hacking the object 'o' in-place: e.g., o.getarg(1) may be
# replaced with another constant greater than 0.)
- o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length],
- descr=arraydescr)
+ #o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length],
+ # descr=arraydescr)
+ scale = arraydescr.itemsize
+ v_length_scaled = v_length
+ if not isinstance(v_length, ConstInt):
+ scale, offset, v_length_scaled = \
+ self._emit_mul_if_factor_offset_not_supported(v_length,
scale, 0)
+ v_scale = ConstInt(scale)
+ # there is probably no point in doing _emit_mul_if.. for
+ # c_zero!
+ args = [v_arr, self.c_zero, v_length_scaled, ConstInt(scale), v_scale]
+ o = ResOperation(rop.ZERO_ARRAY, args, descr=arraydescr)
self.emit_op(o)
if isinstance(v_length, ConstInt):
self.last_zero_arrays.append(self._newops[-1])
@@ -644,22 +654,37 @@
# are also already in 'newops', which is the point.
for op in self.last_zero_arrays:
assert op.getopnum() == rop.ZERO_ARRAY
+ descr = op.getdescr()
+ scale = descr.itemsize
box = op.getarg(0)
try:
intset = self.setarrayitems_occurred(box)
except KeyError:
+ start_box = op.getarg(1)
+ length_box = op.getarg(2)
+ if isinstance(start_box, ConstInt):
+ start = start_box.getint()
+ op.setarg(1, ConstInt(start * scale))
+ op.setarg(3, ConstInt(1))
+ if isinstance(length_box, ConstInt):
+ stop = length_box.getint()
+ scaled_len = stop * scale
+ op.setarg(2, ConstInt(scaled_len))
+ op.setarg(4, ConstInt(1))
continue
assert op.getarg(1).getint() == 0 # always 'start=0' initially
start = 0
while start in intset:
start += 1
- op.setarg(1, ConstInt(start))
+ op.setarg(1, ConstInt(start * scale))
stop = op.getarg(2).getint()
assert start <= stop
while stop > start and (stop - 1) in intset:
stop -= 1
- op.setarg(2, ConstInt(stop - start))
+ op.setarg(2, ConstInt((stop - start) * scale))
# ^^ may be ConstInt(0); then the operation becomes a no-op
+ op.setarg(3, ConstInt(1)) # set scale to 1
+ op.setarg(4, ConstInt(1)) # set scale to 1
del self.last_zero_arrays[:]
self._setarrayitems_occurred.clear()
#
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -36,6 +36,21 @@
assert not isinstance(descr, (str, int))
return 'gc_store(%s, %d, %s, %d)' % (baseptr, descr.offset,
newvalue, descr.field_size)
+ def zero_array(baseptr, start, length, descr_name, descr):
+ assert isinstance(baseptr, str)
+ assert isinstance(start, (str, int))
+ assert isinstance(length, (str, int))
+ assert isinstance(descr_name, str)
+ assert not isinstance(descr, (str,int))
+ itemsize = descr.itemsize
+ start = start * itemsize
+ length_scale = 1
+ if isinstance(length, str):
+ length_scale = itemsize
+ else:
+ length = length * itemsize
+ return 'zero_array(%s, %s, %s, 1, %d, descr=%s)' % \
+ (baseptr, start, length, length_scale, descr_name)
def setarrayitem(baseptr, index, newvalue, descr):
assert isinstance(baseptr, str)
assert isinstance(index, (str, int))
@@ -681,7 +696,7 @@
%(cdescr.basesize + 129 * cdescr.itemsize)d)
gc_store(p1, 0, 8111, %(tiddescr.field_size)s)
gc_store(p1, 0, 129, %(clendescr.field_size)s)
- zero_array(p1, 0, 129, descr=cdescr)
+ %(zero_array('p1', 0, 129, 'cdescr', cdescr))s
call_n(123456)
cond_call_gc_wb(p1, descr=wbdescr)
%(setarrayitem('p1', 'i2', 'p3', cdescr))s
@@ -703,7 +718,7 @@
%(cdescr.basesize + 130 * cdescr.itemsize)d)
gc_store(p1, 0, 8111, %(tiddescr.field_size)s)
gc_store(p1, 0, 130, %(clendescr.field_size)s)
- zero_array(p1, 0, 130, descr=cdescr)
+ %(zero_array('p1', 0, 130, 'cdescr', cdescr))s
call_n(123456)
cond_call_gc_wb_array(p1, i2, descr=wbdescr)
%(setarrayitem('p1', 'i2', 'p3', cdescr))s
@@ -735,7 +750,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
gc_store(p1, 0, 8111, %(tiddescr.field_size)s)
gc_store(p1, 0, 5, %(clendescr.field_size)s)
- zero_array(p1, 0, 5, descr=cdescr)
+ %(zero_array('p1', 0, 5, 'cdescr', cdescr))s
label(p1, i2, p3)
cond_call_gc_wb_array(p1, i2, descr=wbdescr)
%(setarrayitem('p1', 'i2', 'p3', cdescr))s
@@ -810,7 +825,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
gc_store(p0, 0, 8111, %(tiddescr.field_size)s)
gc_store(p0, 0, 5, %(clendescr.field_size)s)
- zero_array(p0, 0, 5, descr=cdescr)
+ %(zero_array('p0', 0, 5, 'cdescr', cdescr))s
%(setarrayitem('p0', 'i2', 'p1', cdescr))s
jump()
""")
@@ -828,7 +843,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
gc_store(p0, 0, 8111, %(tiddescr.field_size)s)
gc_store(p0, 0, 5, %(clendescr.field_size)s)
- zero_array(p0, 2, 3, descr=cdescr)
+ %(zero_array('p0', 2, 3, 'cdescr', cdescr))s
%(setarrayitem('p0', 1, 'p1', cdescr))s
%(setarrayitem('p0', 0, 'p2', cdescr))s
jump()
@@ -847,7 +862,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
gc_store(p0, 0, 8111, %(tiddescr.field_size)s)
gc_store(p0, 0, 5, %(clendescr.field_size)s)
- zero_array(p0, 0, 3, descr=cdescr)
+ %(zero_array('p0', 0, 3, 'cdescr', cdescr))s
%(setarrayitem('p0', 3, 'p1', cdescr))s
%(setarrayitem('p0', 4, 'p2', cdescr))s
jump()
@@ -867,7 +882,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
gc_store(p0, 0, 8111, %(tiddescr.field_size)s)
gc_store(p0, 0, 5, %(clendescr.field_size)s)
- zero_array(p0, 0, 5, descr=cdescr)
+ %(zero_array('p0', 0, 5, 'cdescr', cdescr))s
%(setarrayitem('p0', 3, 'p1', cdescr))s
%(setarrayitem('p0', 2, 'p2', cdescr))s
%(setarrayitem('p0', 1, 'p2', cdescr))s
@@ -890,7 +905,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
gc_store(p0, 0, 8111, %(tiddescr.field_size)s)
gc_store(p0, 0, 5, %(clendescr.field_size)s)
- zero_array(p0, 5, 0, descr=cdescr)
+ %(zero_array('p0', 5, 0, 'cdescr', cdescr))s
%(setarrayitem('p0', 3, 'p1', cdescr))s
%(setarrayitem('p0', 4, 'p2', cdescr))s
%(setarrayitem('p0', 0, 'p1', cdescr))s
@@ -913,7 +928,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
gc_store(p0, 0, 8111, %(tiddescr.field_size)s)
gc_store(p0, 0, 5, %(clendescr.field_size)s)
- zero_array(p0, 1, 4, descr=cdescr)
+ %(zero_array('p0', 1, 4, 'cdescr', cdescr))s
%(setarrayitem('p0', 0, 'p1', cdescr))s
call_n(321321)
cond_call_gc_wb(p0, descr=wbdescr)
@@ -935,7 +950,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
gc_store(p0, 0, 8111, %(tiddescr.field_size)s)
gc_store(p0, 0, 5, %(clendescr.field_size)s)
- zero_array(p0, 1, 4, descr=cdescr)
+ %(zero_array('p0', 1, 4, 'cdescr', cdescr))s
%(setarrayitem('p0', 0, 'p1', cdescr))s
label(p0, p2)
cond_call_gc_wb_array(p0, 1, descr=wbdescr)
@@ -952,7 +967,7 @@
[p1, p2, i3]
p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr)
gc_store(p0, 0, i3, %(blendescr.field_size)s)
- zero_array(p0, 0, i3, descr=bdescr)
+ %(zero_array('p0', 0, 'i3', 'bdescr', bdescr))s
jump()
""")
@@ -966,7 +981,7 @@
[p1, p2, i3]
p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr)
gc_store(p0, 0, i3, %(blendescr.field_size)s)
- zero_array(p0, 0, i3, descr=bdescr)
+ %(zero_array('p0', 0, 'i3', 'bdescr', bdescr))s
cond_call_gc_wb_array(p0, 0, descr=wbdescr)
%(setarrayitem('p0', 0, 'p1', bdescr))s
jump()
diff --git a/rpython/jit/backend/test/runner_test.py
b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -22,6 +22,7 @@
from rpython.jit.backend.detect_cpu import autodetect
from rpython.jit.backend.llsupport import jitframe
from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+from rpython.jit.backend.llsupport.rewrite import GcRewriterAssembler
IS_32_BIT = sys.maxint < 2**32
@@ -53,11 +54,15 @@
add_loop_instructions = ['overload for a specific cpu']
bridge_loop_instructions = ['overload for a specific cpu']
+
def execute_operation(self, opname, valueboxes, result_type, descr=None):
inputargs, operations = self._get_single_operation_list(opname,
result_type,
valueboxes,
descr)
+ return self.execute_operations(inputargs, operations, result_type)
+
+ def execute_operations(self, inputargs, operations, result_type):
looptoken = JitCellToken()
self.cpu.compile_loop(inputargs, operations, looptoken)
args = []
@@ -86,6 +91,23 @@
else:
assert False
+ def _get_operation_list(self, operations, result_type):
+ inputargs = []
+ blacklist = set()
+ for op in operations:
+ for arg in op.getarglist():
+ if not isinstance(arg, Const) and arg not in inputargs and \
+ arg not in blacklist:
+ inputargs.append(arg)
+ if op.type != 'v':
+ blacklist.add(op)
+ if result_type == 'void':
+ op1 = ResOperation(rop.FINISH, [], descr=BasicFinalDescr(0))
+ else:
+ op1 = ResOperation(rop.FINISH, [operations[-1]],
descr=BasicFinalDescr(0))
+ operations.append(op1)
+ return inputargs, operations
+
def _get_single_operation_list(self, opnum, result_type, valueboxes,
descr):
op0 = ResOperation(opnum, valueboxes)
@@ -4983,7 +5005,7 @@
addr = llmemory.cast_ptr_to_adr(a)
a_int = heaptracker.adr2int(addr)
a_ref = lltype.cast_opaque_ptr(llmemory.GCREF, a)
- for (start, length) in [(0, 100), (49, 49), (1, 98),
+ for (start, length) in [(0,100), (49, 49), (1, 98),
(15, 9), (10, 10), (47, 0),
(0, 4)]:
for cls1 in [ConstInt, InputArgInt]:
@@ -5001,11 +5023,31 @@
lengthbox = cls2(length)
if cls1 == cls2 and start == length:
lengthbox = startbox # same box!
- self.execute_operation(rop.ZERO_ARRAY,
- [InputArgRef(a_ref),
- startbox,
- lengthbox],
- 'void', descr=arraydescr)
+ scale = arraydescr.itemsize
+ ops = []
+ def emit(op):
+ ops.append(op)
+ helper = GcRewriterAssembler(None, self.cpu)
+ helper.emit_op = emit
+ offset = 0
+ scale_start, s_offset, v_start = \
+
helper._emit_mul_if_factor_offset_not_supported(
+ startbox, scale, offset)
+ if v_start is None:
+ v_start = ConstInt(s_offset)
+ scale_len, e_offset, v_len = \
+
helper._emit_mul_if_factor_offset_not_supported(
+ lengthbox, scale, offset)
+ if v_len is None:
+ v_len = ConstInt(e_offset)
+ args = [InputArgRef(a_ref), v_start, v_len,
+ ConstInt(scale_start), ConstInt(scale_len)]
+ ops.append(ResOperation(rop.ZERO_ARRAY, args,
+ descr=arraydescr))
+
+ scalebox = ConstInt(arraydescr.itemsize)
+ inputargs, oplist =
self._get_operation_list(ops,'void')
+ self.execute_operations(inputargs, oplist, 'void')
assert len(a) == 100
for i in range(100):
val = (0 if start <= i < start + length
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1528,25 +1528,6 @@
#
return shift
- def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
- base_loc, ofs_loc):
- assert isinstance(itemsize_loc, ImmedLoc)
- itemsize = itemsize_loc.value
- if isinstance(index_loc, ImmedLoc):
- temp_loc = imm(index_loc.value * itemsize)
- shift = 0
- elif valid_addressing_size(itemsize):
- temp_loc = index_loc
- shift = get_scale(itemsize)
- else:
- assert isinstance(index_loc, RegLoc)
- assert isinstance(temp_loc, RegLoc)
- assert not temp_loc.is_xmm
- shift = self._imul_const_scaled(self.mc, temp_loc.value,
- index_loc.value, itemsize)
- assert isinstance(ofs_loc, ImmedLoc)
- return AddressLoc(base_loc, temp_loc, shift, ofs_loc.value)
-
def genop_discard_increment_debug_counter(self, op, arglocs):
# The argument should be an immediate address. This should
# generate code equivalent to a GETFIELD_RAW, an ADD(1), and a
@@ -2379,6 +2360,7 @@
shift = self._imul_const_scaled(self.mc, edi.value,
varsizeloc.value, itemsize)
varsizeloc = edi
+
# now varsizeloc is a register != eax. The size of
# the variable part of the array is (varsizeloc << shift)
assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
@@ -2468,13 +2450,8 @@
assert isinstance(null_loc, RegLoc) and null_loc.is_xmm
baseofs = baseofs_loc.value
nbytes = bytes_loc.value
- if valid_addressing_size(itemsize_loc.value):
- scale = get_scale(itemsize_loc.value)
- else:
- assert isinstance(startindex_loc, ImmedLoc)
- baseofs += startindex_loc.value * itemsize_loc.value
- startindex_loc = imm0
- scale = 0
+ assert valid_addressing_size(itemsize_loc.value)
+ scale = get_scale(itemsize_loc.value)
null_reg_cleared = False
i = 0
while i < nbytes:
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -9,7 +9,7 @@
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
from rpython.jit.backend.llsupport.regalloc import (FrameManager, BaseRegalloc,
RegisterManager, TempVar, compute_vars_longevity, is_comparison_or_ovf_op,
- valid_addressing_size)
+ valid_addressing_size, get_scale)
from rpython.jit.backend.x86 import rx86
from rpython.jit.backend.x86.arch import (WORD, JITFRAME_FIXED_SIZE, IS_X86_32,
IS_X86_64, DEFAULT_FRAME_BYTES)
@@ -32,6 +32,7 @@
from rpython.rtyper.annlowlevel import cast_instance_to_gcref
from rpython.rtyper.lltypesystem import lltype, rffi, rstr
from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.jit.backend.x86.regloc import AddressLoc
class X86RegisterManager(RegisterManager):
@@ -1389,21 +1390,39 @@
def consider_keepalive(self, op):
pass
+ def _scaled_addr(self, index_loc, itemsize_loc,
+ base_loc, ofs_loc):
+ assert isinstance(itemsize_loc, ImmedLoc)
+ itemsize = itemsize_loc.value
+ if isinstance(index_loc, ImmedLoc):
+ temp_loc = imm(index_loc.value * itemsize)
+ shift = 0
+ else:
+ assert valid_addressing_size(itemsize), "rewrite did not correctly
handle shift/mul!"
+ temp_loc = index_loc
+ shift = get_scale(itemsize)
+ assert isinstance(ofs_loc, ImmedLoc)
+ return AddressLoc(base_loc, temp_loc, shift, ofs_loc.value)
+
def consider_zero_array(self, op):
- itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+ _, baseofs, _ = unpack_arraydescr(op.getdescr())
length_box = op.getarg(2)
+
+ scale_box = op.getarg(3)
+ assert isinstance(scale_box, ConstInt)
+ start_itemsize = scale_box.value
+
+ len_scale_box = op.getarg(4)
+ assert isinstance(len_scale_box, ConstInt)
+ len_itemsize = len_scale_box.value
+ # rewrite handles the mul of a constant length box
+ constbytes = -1
if isinstance(length_box, ConstInt):
- constbytes = length_box.getint() * itemsize
- if constbytes == 0:
- return # nothing to do
- else:
- constbytes = -1
+ constbytes = length_box.getint()
args = op.getarglist()
base_loc = self.rm.make_sure_var_in_reg(args[0], args)
startindex_loc = self.rm.make_sure_var_in_reg(args[1], args)
- if 0 <= constbytes <= 16 * 8 and (
- valid_addressing_size(itemsize) or
- isinstance(startindex_loc, ImmedLoc)):
+ if 0 <= constbytes <= 16 * 8:
if IS_X86_64:
null_loc = X86_64_XMM_SCRATCH_REG
else:
@@ -1411,7 +1430,7 @@
null_loc = self.xrm.force_allocate_reg(null_box)
self.xrm.possibly_free_var(null_box)
self.perform_discard(op, [base_loc, startindex_loc,
- imm(constbytes), imm(itemsize),
+ imm(constbytes), imm(start_itemsize),
imm(baseofs), null_loc])
else:
# base_loc and startindex_loc are in two regs here (or they are
@@ -1421,10 +1440,9 @@
# args[2], because we're still needing the latter.
dstaddr_box = TempVar()
dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, [args[2]])
- itemsize_loc = imm(itemsize)
- dst_addr = self.assembler._get_interiorfield_addr(
- dstaddr_loc, startindex_loc, itemsize_loc,
- base_loc, imm(baseofs))
+ itemsize_loc = imm(start_itemsize)
+ dst_addr = self._scaled_addr(startindex_loc, itemsize_loc,
+ base_loc, imm(baseofs))
self.assembler.mc.LEA(dstaddr_loc, dst_addr)
#
if constbytes >= 0:
@@ -1433,15 +1451,15 @@
# load length_loc in a register different than dstaddr_loc
length_loc = self.rm.make_sure_var_in_reg(length_box,
[dstaddr_box])
- if itemsize > 1:
+ if len_itemsize > 1:
# we need a register that is different from dstaddr_loc,
# but which can be identical to length_loc (as usual,
# only if the length_box is not used by future operations)
bytes_box = TempVar()
bytes_loc = self.rm.force_allocate_reg(bytes_box,
[dstaddr_box])
- b_adr = self.assembler._get_interiorfield_addr(
- bytes_loc, length_loc, itemsize_loc, imm0, imm0)
+ len_itemsize_loc = imm(len_itemsize)
+ b_adr = self._scaled_addr(length_loc, len_itemsize_loc,
imm0, imm0)
self.assembler.mc.LEA(bytes_loc, b_adr)
length_box = bytes_box
length_loc = bytes_loc
diff --git a/rpython/jit/backend/x86/vector_ext.py
b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -9,7 +9,7 @@
ebp, r8, r9, r10, r11, r12, r13, r14, r15, xmm0, xmm1, xmm2, xmm3, xmm4,
xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14,
X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG, AddressLoc)
-from rpython.jit.backend.llsupport.regalloc import (get_scale,
valid_addressing_size)
+from rpython.jit.backend.llsupport.regalloc import get_scale
from rpython.jit.metainterp.resoperation import (rop, ResOperation,
VectorOp, VectorGuardOp)
from rpython.rlib.objectmodel import we_are_translated
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1225,7 +1225,7 @@
'SETINTERIORFIELD_GC/3d/n',
'SETINTERIORFIELD_RAW/3d/n', # right now, only used by tests
'SETFIELD_GC/2d/n',
- 'ZERO_ARRAY/3d/n', # only emitted by the rewrite, clears (part of) an
array
+ 'ZERO_ARRAY/4d/n', # only emitted by the rewrite, clears (part of) an
array
# [arraygcptr, firstindex, length], descr=ArrayDescr
'SETFIELD_RAW/2d/n',
'STRSETITEM/3/n',
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit