Author: Richard Plangger <[email protected]>
Branch: s390x-backend
Changeset: r82174:2b50cfaee409
Date: 2016-02-12 12:39 +0100
http://bitbucket.org/pypy/pypy/changeset/2b50cfaee409/
Log: replaced arith left shift with logical (other backends do not use
arith shift there!), do not call memcpy for strings anymore, s390x
has a dedicated instr. to do just that. this removes the overhead
for calling simplifications to the register allocator and assembler
some small testsn
diff --git a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
b/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
@@ -127,7 +127,7 @@
i0 = getfield_gc_i(ConstPtr(pinned_obj_gcref),
descr=pinned_obj_my_int_descr)
""", """
[]
- p1 = gc_load_indexed_r(ConstPtr(ptr_array_gcref), %(0 *
ptr_array_descr.itemsize + 1)s, 1, 0, %(ptr_array_descr.itemsize)s)
+ p1 = gc_load_r(ConstPtr(ptr_array_gcref), %(0 *
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
i0 = gc_load_i(p1, 0, -%(pinned_obj_my_int_descr.field_size)s)
""")
assert len(self.gc_ll_descr.last_moving_obj_tracker._indexes) == 1
diff --git a/rpython/jit/backend/zarch/opassembler.py
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -589,7 +589,7 @@
# set SCRATCH2 to 1 << r1
mc.LGHI(r.SCRATCH2, l.imm(1))
- mc.SLAG(r.SCRATCH2, r.SCRATCH2, l.addr(0,r.SCRATCH))
+ mc.SLLG(r.SCRATCH2, r.SCRATCH2, l.addr(0,r.SCRATCH))
# set this bit inside the byte of interest
@@ -1002,20 +1002,17 @@
if src_ofs.is_imm():
value = src_ofs.value << scale
if check_imm_value(value):
- if dst is not src_ptr:
- self.mc.LGR(dst, src_ptr)
- if value != 0:
- self.mc.AGHI(dst, l.imm(value))
+ self.mc.AGHIK(dst, src_ptr, l.imm(value))
else:
- self.mc.load_imm(dst, value)
- self.mc.AGR(dst, src_ptr)
+ # it is fine to use r1 here, because it will
+ # only hold a value before invoking the memory copy
+ self.mc.load_imm(r.SCRATCH, value)
+ self.mc.AGRK(dst, src_ptr, r.SCRATCH)
elif scale == 0:
- if dst is not src_ptr:
- self.mc.LGR(dst, src_ptr)
- self.mc.AGR(dst, src_ofs)
+ self.mc.AGRK(dst, src_ptr, src_ofs)
else:
- self.mc.SLLG(dst, src_ofs, l.addr(scale))
- self.mc.AGR(dst, src_ptr)
+ self.mc.SLLG(r.SCRATCH, src_ofs, l.addr(scale))
+ self.mc.AGRK(dst, src_ptr, r.SCRATCH)
def _emit_copycontent(self, arglocs, is_unicode):
[src_ptr_loc, dst_ptr_loc,
@@ -1033,34 +1030,40 @@
assert itemsize == 1
scale = 0
- self._emit_load_for_copycontent(r.SCRATCH, src_ptr_loc, src_ofs_loc,
scale)
- self._emit_load_for_copycontent(r.SCRATCH2, dst_ptr_loc, dst_ofs_loc,
scale)
- #
- # DO NOT USE r2-r6 before this line!
- # either of the parameter (e.g. str_ptr_loc, ...) locations might be
allocated
+ # src and src_len are tmp registers
+ src = src_ptr_loc
+ src_len = r.odd_reg(src)
+ dst = r.r0
+ dst_len = r.r1
+ self._emit_load_for_copycontent(src, src_ptr_loc, src_ofs_loc, scale)
+ self._emit_load_for_copycontent(dst, dst_ptr_loc, dst_ofs_loc, scale)
if length_loc.is_imm():
length = length_loc.getint()
- self.mc.load_imm(r.r4, length << scale)
+ self.mc.load_imm(dst_len, length << scale)
else:
if scale > 0:
- self.mc.SLAG(r.r4, length_loc, l.addr(scale))
- elif length_loc is not r.r4:
- self.mc.LGR(r.r4, length_loc)
+ self.mc.SLLG(dst_len, length_loc, l.addr(scale))
+ else:
+ self.mc.LGR(dst_len, length_loc)
+ # ensure that src_len is as long as dst_len, otherwise
+ # padding bytes are written to dst
+ self.mc.LGR(src_len, dst_len)
- self.mc.LGR(r.r3, r.SCRATCH)
- self.mc.LGR(r.r2, r.SCRATCH2)
- if basesize != 0:
- self.mc.AGHI(r.r3, l.imm(basesize))
- if basesize != 0:
- self.mc.AGHI(r.r2, l.imm(basesize))
+ self.mc.AGHI(src, l.imm(basesize))
+ self.mc.AGHI(dst, l.imm(basesize))
- self.mc.load_imm(self.mc.RAW_CALL_REG, self.memcpy_addr)
- self.mc.raw_call()
+ # s390x has memset directly as a hardware instruction!!
+ # 0xB8 means we might reference dst later
+ self.mc.MVCLE(dst, src, l.addr(0xB8))
+ # NOTE this instruction can (determined by the cpu), just
+ # quit the movement any time, thus it is looped until all bytes
+ # are copied!
+ self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
def emit_zero_array(self, op, arglocs, regalloc):
base_loc, startindex_loc, length_loc, \
- ofs_loc, itemsize_loc, pad_byte_loc = arglocs
+ ofs_loc, itemsize_loc = arglocs
if ofs_loc.is_imm():
assert check_imm_value(ofs_loc.value)
@@ -1073,24 +1076,21 @@
else:
self.mc.AGR(base_loc, startindex_loc)
assert not length_loc.is_imm()
- self.mc.XGR(pad_byte_loc, pad_byte_loc)
- pad_plus = r.odd_reg(pad_byte_loc)
- self.mc.XGR(pad_plus, pad_plus)
- self.mc.XGR(r.SCRATCH, r.SCRATCH)
+ # contents of r0 do not matter because r1 is zero, so
+ # no copying takes place
+ self.mc.XGR(r.r1, r.r1)
+
+ assert base_loc.is_even()
+ assert length_loc.value == base_loc.value + 1
+
# s390x has memset directly as a hardware instruction!!
# it needs 5 registers allocated
- # dst = rX, length = rX+1 (ensured by the regalloc)
- # pad_byte is rY to rY+1
- # scratch register holds the value written to dst
- assert pad_byte_loc.is_even()
- assert pad_plus.value == pad_byte_loc.value + 1
- assert base_loc.is_even()
- assert length_loc.value == base_loc.value + 1
- assert base_loc.value != pad_byte_loc.value
+ # dst = rX, dst len = rX+1 (ensured by the regalloc)
+ # src = r0, src len = r1
+ self.mc.MVCLE(base_loc, r.r0, l.addr(0))
# NOTE this instruction can (determined by the cpu), just
# quit the movement any time, thus it is looped until all bytes
# are copied!
- self.mc.MVCLE(base_loc, pad_byte_loc, l.addr(0, r.SCRATCH))
self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
diff --git a/rpython/jit/backend/zarch/regalloc.py
b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -161,6 +161,14 @@
def ensure_even_odd_pair(self, var, bindvar, bind_first=True,
must_exist=True, load_loc_odd=True,
move_regs=True):
+ """ Allocates two registers that can be used by the instruction.
+ var: is the original register holding the value
+ bindvar: is the variable that will be bound
+ (= self.reg_bindings[bindvar] = new register)
+ bind_first: the even register will be bound to bindvar,
+ if bind_first == False: the odd register will
+ be bound
+ """
self._check_type(var)
prev_loc = self.loc(var, must_exist=must_exist)
var2 = TempVar()
@@ -592,13 +600,23 @@
return imm(box.getint())
return self.rm.ensure_reg(box, force_in_reg=True,
selected_reg=selected_reg)
- def ensure_reg_or_any_imm(self, box):
+ def ensure_reg_or_20bit_imm(self, box, selected_reg=None):
if box.type == FLOAT:
return self.fprm.ensure_reg(box, True)
else:
+ if helper.check_imm20(box):
+ return imm(box.getint())
+ return self.rm.ensure_reg(box, force_in_reg=True,
selected_reg=selected_reg)
+
+ def ensure_reg_or_any_imm(self, box, selected_reg=None):
+ if box.type == FLOAT:
+ return self.fprm.ensure_reg(box, True,
+ selected_reg=selected_reg)
+ else:
if isinstance(box, Const):
return imm(box.getint())
- return self.rm.ensure_reg(box, force_in_reg=True)
+ return self.rm.ensure_reg(box, force_in_reg=True,
+ selected_reg=selected_reg)
def get_scratch_reg(self, type, selected_reg=None):
if type == FLOAT:
@@ -798,7 +816,7 @@
def _prepare_gc_load(self, op):
base_loc = self.ensure_reg(op.getarg(0), force_in_reg=True)
- index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
+ index_loc = self.ensure_reg_or_20bit_imm(op.getarg(1))
size_box = op.getarg(2)
assert isinstance(size_box, ConstInt)
size = abs(size_box.value)
@@ -815,7 +833,7 @@
def _prepare_gc_load_indexed(self, op):
base_loc = self.ensure_reg(op.getarg(0), force_in_reg=True)
- index_loc = self.ensure_reg(op.getarg(1), force_in_reg=True)
+ index_loc = self.ensure_reg_or_20bit_imm(op.getarg(1))
scale_box = op.getarg(2)
offset_box = op.getarg(3)
size_box = op.getarg(4)
@@ -841,7 +859,7 @@
def prepare_gc_store(self, op):
base_loc = self.ensure_reg(op.getarg(0), force_in_reg=True)
- index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
+ index_loc = self.ensure_reg_or_20bit_imm(op.getarg(1))
value_loc = self.ensure_reg(op.getarg(2))
size_box = op.getarg(3)
assert isinstance(size_box, ConstInt)
@@ -852,7 +870,7 @@
def prepare_gc_store_indexed(self, op):
args = op.getarglist()
base_loc = self.ensure_reg(op.getarg(0), force_in_reg=True)
- index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
+ index_loc = self.ensure_reg_or_20bit_imm(op.getarg(1))
value_loc = self.ensure_reg(op.getarg(2))
scale_box = op.getarg(3)
offset_box = op.getarg(4)
@@ -953,21 +971,20 @@
return self._prepare_call_default(op)
def prepare_zero_array(self, op):
+ # args: base, start, len, scale_start, scale_len
itemsize, ofs, _ = unpack_arraydescr(op.getdescr())
startindex_loc = self.ensure_reg_or_16bit_imm(op.getarg(1))
tempvar = TempInt()
self.rm.temp_boxes.append(tempvar)
ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(ofs))
- pad_byte, _ = self.rm.ensure_even_odd_pair(tempvar, tempvar,
- bind_first=True, must_exist=False,
move_regs=False)
- base_loc, length_loc = self.rm.ensure_even_odd_pair(op.getarg(0), op,
+ base_loc, length_loc = self.rm.ensure_even_odd_pair(op.getarg(0),
tempvar,
bind_first=True, must_exist=False, load_loc_odd=False)
length_box = op.getarg(2)
ll = self.rm.loc(length_box)
if length_loc is not ll:
self.assembler.regalloc_mov(ll, length_loc)
- return [base_loc, startindex_loc, length_loc, ofs_loc, imm(itemsize),
pad_byte]
+ return [base_loc, startindex_loc, length_loc, ofs_loc, imm(itemsize)]
def prepare_cond_call(self, op):
self.load_condition_into_cc(op.getarg(0))
@@ -1102,12 +1119,25 @@
return [loc0, loc1]
def prepare_copystrcontent(self, op):
- src_ptr_loc = self.ensure_reg(op.getarg(0), force_in_reg=True)
+ """ this function needs five registers.
+ src & src_len: are allocated using ensure_even_odd_pair.
+ note that these are tmp registers, thus the actual variable
+ value is not modified.
+ src_len: when entering the assembler, src_ofs_loc's value is
contained
+ in src_len register.
+ """
+ src_tmp = TempVar()
+ src_ptr_loc, _ = \
+ self.rm.ensure_even_odd_pair(op.getarg(0),
+ src_tmp, bind_first=True,
+ must_exist=False, load_loc_odd=False)
+ src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
+ self.rm.temp_boxes.append(src_tmp)
dst_ptr_loc = self.ensure_reg(op.getarg(1), force_in_reg=True)
- src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
length_loc = self.ensure_reg_or_any_imm(op.getarg(4))
- self._spill_before_call(save_all_regs=False)
+ # no need to spill, we do not call memcpy, but we use s390x's
+ # hardware instruction to copy memory
return [src_ptr_loc, dst_ptr_loc,
src_ofs_loc, dst_ofs_loc, length_loc]
diff --git a/rpython/jit/backend/zarch/test/test_assembler.py
b/rpython/jit/backend/zarch/test/test_assembler.py
--- a/rpython/jit/backend/zarch/test/test_assembler.py
+++ b/rpython/jit/backend/zarch/test/test_assembler.py
@@ -226,6 +226,35 @@
self.a.mc.BCR(con.ANY, r.r14)
assert run_asm(self.a) == 15
+ def test_shift_same_register(self):
+ self.a.mc.load_imm(r.r3, 0x1)
+ self.a.mc.SLLG(r.r2, r.r3, loc.addr(1))
+ self.a.mc.BCR(con.ANY, r.r14)
+ assert run_asm(self.a) == 2
+
+ def test_shift_arith(self):
+ self.a.mc.load_imm(r.r2, -14)
+ self.a.mc.SLAG(r.r2, r.r2, loc.addr(1))
+ self.a.mc.BCR(con.ANY, r.r14)
+ assert run_asm(self.a) == -28
+
+ def test_shift_negative_logical(self):
+ self.a.mc.load_imm(r.r2, -14)
+ self.a.mc.SLLG(r.r2, r.r2, loc.addr(1))
+ self.a.mc.BCR(con.ANY, r.r14)
+ assert run_asm(self.a) == -28
+
+ def test_shift_negative_logical_2(self):
+ self.a.mc.load_imm(r.r2, -2)
+ self.a.mc.SLLG(r.r2, r.r2, loc.addr(63))
+ self.a.mc.BCR(con.ANY, r.r14)
+ assert run_asm(self.a) == 0
+
+ def test_shift_negative_logical_3(self):
+ self.a.mc.load_imm(r.r2, -2)
+ self.a.mc.SLLG(r.r3, r.r2, loc.addr(1))
+ self.a.mc.BCR(con.ANY, r.r14)
+ assert run_asm(self.a) == -2
def test_load_small_int_to_reg(self):
self.a.mc.LGHI(r.r2, loc.imm(123))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit