Author: Armin Rigo <[email protected]>
Branch: copystrcontents-in-rewrite
Changeset: r96780:c08ec5828388
Date: 2019-06-09 12:46 +0200
http://bitbucket.org/pypy/pypy/changeset/c08ec5828388/
Log: Finish the logic for CPUs that don't have good support for
load_effective_address: rewrite.py emits a sequence of int_add and
int_lshift in this case.
diff --git a/rpython/jit/backend/arm/runner.py
b/rpython/jit/backend/arm/runner.py
--- a/rpython/jit/backend/arm/runner.py
+++ b/rpython/jit/backend/arm/runner.py
@@ -23,6 +23,7 @@
supports_floats = True
supports_longlong = True
supports_singlefloats = True
+ supports_load_effective_address = True
from rpython.jit.backend.arm.arch import JITFRAME_FIXED_SIZE
all_reg_indexes = range(len(all_regs))
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -969,37 +969,52 @@
basesize = self.gc_ll_descr.str_descr.basesize
# because we have one extra item after alloc, the actual address
# of string start is 1 lower, from extra_item_after_malloc
- base = ConstInt(basesize - 1)
- itemsize = self.gc_ll_descr.str_descr.itemsize
- assert itemsize == 1
- itemscale = ConstInt(0)
+ basesize -= 1
+ assert self.gc_ll_descr.str_descr.itemsize == 1
+ itemscale = 0
else:
- base = ConstInt(self.gc_ll_descr.unicode_descr.basesize)
+ basesize = self.gc_ll_descr.unicode_descr.basesize
itemsize = self.gc_ll_descr.unicode_descr.itemsize
if itemsize == 2:
- itemscale = ConstInt(1)
+ itemscale = 1
elif itemsize == 4:
- itemscale = ConstInt(2)
+ itemscale = 2
else:
assert False, "unknown size of unicode"
- i1 = ResOperation(rop.LOAD_EFFECTIVE_ADDRESS,
- [op.getarg(0), op.getarg(2), base, itemscale])
- i2 = ResOperation(rop.LOAD_EFFECTIVE_ADDRESS,
- [op.getarg(1), op.getarg(3), base, itemscale])
- self.emit_op(i1)
- self.emit_op(i2)
+ i1 = self.emit_load_effective_address(op.getarg(0), op.getarg(2),
+ basesize, itemscale)
+ i2 = self.emit_load_effective_address(op.getarg(1), op.getarg(3),
+ basesize, itemscale)
if op.getopnum() == rop.COPYSTRCONTENT:
arg = op.getarg(4)
else:
# do some basic constant folding
if isinstance(op.getarg(4), ConstInt):
- arg = ConstInt(op.getarg(4).getint() * itemsize)
+ arg = ConstInt(op.getarg(4).getint() << itemscale)
else:
- arg = ResOperation(rop.INT_MUL, [op.getarg(4),
ConstInt(itemsize)])
+ arg = ResOperation(rop.INT_LSHIFT,
+ [op.getarg(4), ConstInt(itemscale)])
self.emit_op(arg)
self.emit_op(ResOperation(rop.CALL_N,
[ConstInt(memcpy_fn), i2, i1, arg], descr=memcpy_descr))
+ def emit_load_effective_address(self, v_gcptr, v_index, base, itemscale):
+ if self.cpu.supports_load_effective_address:
+ i1 = ResOperation(rop.LOAD_EFFECTIVE_ADDRESS,
+ [v_gcptr, v_index, ConstInt(base),
+ ConstInt(itemscale)])
+ self.emit_op(i1)
+ return i1
+ else:
+ if itemscale > 0:
+ v_index = ResOperation(rop.INT_LSHIFT,
+ [v_index, ConstInt(itemscale)])
+ self.emit_op(v_index)
+ i1b = ResOperation(rop.INT_ADD, [v_gcptr, v_index])
+ self.emit_op(i1b)
+ i1 = ResOperation(rop.INT_ADD, [i1b, ConstInt(base)])
+ self.emit_op(i1)
+ return i1
def remove_constptr(self, c):
"""Remove all ConstPtrs, and replace them with load_from_gc_table.
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -148,6 +148,8 @@
unicodelendescr = unicodedescr.lendescr
strhashdescr = self.gc_ll_descr.str_hash_descr
unicodehashdescr = self.gc_ll_descr.unicode_hash_descr
+ uni_basesize = unicodedescr.basesize
+ uni_itemscale = {2: 1, 4: 2}[unicodedescr.itemsize]
memcpy_fn = self.gc_ll_descr.memcpy_fn
memcpy_descr = self.gc_ll_descr.memcpy_descr
@@ -204,6 +206,7 @@
load_constant_offset = True
load_supported_factors = (1,2,4,8)
+ supports_load_effective_address = True
translate_support_code = None
@@ -1454,3 +1457,46 @@
i3 = load_effective_address(p1, i1, %(str_basesize)s, 0)
call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr)
""")
+
+ def test_rewrite_copystrcontents_without_load_effective_address(self):
+ self.cpu.supports_load_effective_address = False
+ self.check_rewrite("""
+ [p0, p1, i0, i1, i_len]
+ copystrcontent(p0, p1, i0, i1, i_len)
+ """, """
+ [p0, p1, i0, i1, i_len]
+ i2b = int_add(p0, i0)
+ i2 = int_add(i2b, %(str_basesize)s)
+ i3b = int_add(p1, i1)
+ i3 = int_add(i3b, %(str_basesize)s)
+ call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr)
+ """)
+
+ def test_rewrite_copyunicodecontents(self):
+ self.check_rewrite("""
+ [p0, p1, i0, i1, i_len]
+ copyunicodecontent(p0, p1, i0, i1, i_len)
+ """, """
+ [p0, p1, i0, i1, i_len]
+ i2 = load_effective_address(p0, i0, %(uni_basesize)s,
%(uni_itemscale)d)
+ i3 = load_effective_address(p1, i1, %(uni_basesize)s,
%(uni_itemscale)d)
+ i4 = int_lshift(i_len, %(uni_itemscale)d)
+ call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr)
+ """)
+
+ def test_rewrite_copyunicodecontents_without_load_effective_address(self):
+ self.cpu.supports_load_effective_address = False
+ self.check_rewrite("""
+ [p0, p1, i0, i1, i_len]
+ copyunicodecontent(p0, p1, i0, i1, i_len)
+ """, """
+ [p0, p1, i0, i1, i_len]
+ i0s = int_lshift(i0, %(uni_itemscale)d)
+ i2b = int_add(p0, i0s)
+ i2 = int_add(i2b, %(uni_basesize)s)
+ i1s = int_lshift(i1, %(uni_itemscale)d)
+ i3b = int_add(p1, i1s)
+ i3 = int_add(i3b, %(uni_basesize)s)
+ i4 = int_lshift(i_len, %(uni_itemscale)d)
+ call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr)
+ """)
diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py
--- a/rpython/jit/backend/model.py
+++ b/rpython/jit/backend/model.py
@@ -19,6 +19,7 @@
# Boxes and Consts are BoxFloats and ConstFloats.
supports_singlefloats = False
supports_guard_gc_type = False
+ supports_load_effective_address = False
propagate_exception_descr = None
diff --git a/rpython/jit/backend/ppc/opassembler.py
b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -966,72 +966,6 @@
pmc.overwrite()
-class StrOpAssembler(object):
-
- _mixin_ = True
-
- def emit_copystrcontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=False)
-
- def emit_copyunicodecontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=True)
-
- def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
- if src_ofs.is_imm():
- value = src_ofs.value << scale
- if value < 32768:
- self.mc.addi(dst.value, src_ptr.value, value)
- else:
- self.mc.load_imm(dst, value)
- self.mc.add(dst.value, src_ptr.value, dst.value)
- elif scale == 0:
- self.mc.add(dst.value, src_ptr.value, src_ofs.value)
- else:
- self.mc.sldi(dst.value, src_ofs.value, scale)
- self.mc.add(dst.value, src_ptr.value, dst.value)
-
- def _emit_copycontent(self, arglocs, is_unicode):
- [src_ptr_loc, dst_ptr_loc,
- src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
-
- if is_unicode:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- if itemsize == 2: scale = 1
- elif itemsize == 4: scale = 2
- else: raise AssertionError
- else:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
- self.cpu.translate_support_code)
- assert itemsize == 1
- basesize -= 1 # for the extra null character
- scale = 0
-
- self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale)
- self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale)
-
- if length_loc.is_imm():
- length = length_loc.getint()
- self.mc.load_imm(r.r5, length << scale)
- else:
- if scale > 0:
- self.mc.sldi(r.r5.value, length_loc.value, scale)
- elif length_loc is not r.r5:
- self.mc.mr(r.r5.value, length_loc.value)
-
- self.mc.mr(r.r4.value, r.r0.value)
- self.mc.addi(r.r4.value, r.r4.value, basesize)
- self.mc.addi(r.r3.value, r.r2.value, basesize)
-
- self.mc.load_imm(self.mc.RAW_CALL_REG, self.memcpy_addr)
- self.mc.raw_call()
-
-
-class UnicodeOpAssembler(object):
- _mixin_ = True
- # empty!
-
-
class AllocOpAssembler(object):
_mixin_ = True
@@ -1336,8 +1270,7 @@
class OpAssembler(IntOpAssembler, GuardOpAssembler,
MiscOpAssembler, FieldOpAssembler,
- StrOpAssembler, CallOpAssembler,
- UnicodeOpAssembler, ForceOpAssembler,
+ CallOpAssembler, ForceOpAssembler,
AllocOpAssembler, FloatOpAssembler,
VectorAssembler):
_mixin_ = True
diff --git a/rpython/jit/backend/ppc/regalloc.py
b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -802,26 +802,6 @@
temp_loc = r.SCRATCH2
return [base_loc, temp_loc]
- def prepare_load_effective_address(self, op):
- arg0 = self.ensure_reg(op.getarg(0))
- arg1 = self.ensure_reg(op.getarg(1))
- arg2 = self.ensure_reg_or_any_imm(op.getarg(2))
- arg3 = self.ensure_reg_or_any_imm(op.getarg(3))
- resloc = self.force_allocate_reg(op)
- return [arg0, arg1, arg2, arg3, resloc]
-
- def prepare_copystrcontent(self, op):
- src_ptr_loc = self.ensure_reg(op.getarg(0))
- dst_ptr_loc = self.ensure_reg(op.getarg(1))
- src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
- dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
- length_loc = self.ensure_reg_or_any_imm(op.getarg(4))
- self._spill_before_call(gc_level=0)
- return [src_ptr_loc, dst_ptr_loc,
- src_ofs_loc, dst_ofs_loc, length_loc]
-
- prepare_copyunicodecontent = prepare_copystrcontent
-
prepare_same_as_i = helper.prepare_unary_op
prepare_same_as_r = helper.prepare_unary_op
prepare_same_as_f = helper.prepare_unary_op
diff --git a/rpython/jit/backend/x86/runner.py
b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -16,6 +16,7 @@
debug = True
supports_floats = True
supports_singlefloats = True
+ supports_load_effective_address = True
dont_keepalive_stuff = False # for tests
with_threads = False
diff --git a/rpython/jit/backend/zarch/opassembler.py
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -963,75 +963,15 @@
def _mem_offset_supported(self, value):
return -2**19 <= value < 2**19
- def emit_copystrcontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=False)
-
- def emit_copyunicodecontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=True)
-
- def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
- if src_ofs.is_imm():
- value = src_ofs.value << scale
- if check_imm_value(value):
- self.mc.AGHIK(dst, src_ptr, l.imm(value))
- else:
- # it is fine to use r1 here, because it will
- # only hold a value before invoking the memory copy
- self.mc.load_imm(r.SCRATCH, value)
- self.mc.AGRK(dst, src_ptr, r.SCRATCH)
- elif scale == 0:
- self.mc.AGRK(dst, src_ptr, src_ofs)
- else:
- self.mc.SLLG(r.SCRATCH, src_ofs, l.addr(scale))
- self.mc.AGRK(dst, src_ptr, r.SCRATCH)
-
- def _emit_copycontent(self, arglocs, is_unicode):
- [src_ptr_loc, dst_ptr_loc,
- src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
-
- if is_unicode:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- if itemsize == 2: scale = 1
- elif itemsize == 4: scale = 2
- else: raise AssertionError
- else:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
- self.cpu.translate_support_code)
- assert itemsize == 1
- basesize -= 1 # for the extra null character
- scale = 0
-
- # src and src_len are tmp registers
- src = src_ptr_loc
- src_len = r.odd_reg(src)
- dst = r.r0
- dst_len = r.r1
- self._emit_load_for_copycontent(src, src_ptr_loc, src_ofs_loc, scale)
- self._emit_load_for_copycontent(dst, dst_ptr_loc, dst_ofs_loc, scale)
-
- if length_loc.is_imm():
- length = length_loc.getint()
- self.mc.load_imm(dst_len, length << scale)
- else:
- if scale > 0:
- self.mc.SLLG(dst_len, length_loc, l.addr(scale))
- else:
- self.mc.LGR(dst_len, length_loc)
- # ensure that src_len is as long as dst_len, otherwise
- # padding bytes are written to dst
- self.mc.LGR(src_len, dst_len)
-
- self.mc.AGHI(src, l.imm(basesize))
- self.mc.AGHI(dst, l.imm(basesize))
-
- # s390x has memset directly as a hardware instruction!!
- # 0xB8 means we might reference dst later
- self.mc.MVCLE(dst, src, l.addr(0xB8))
- # NOTE this instruction can (determined by the cpu), just
- # quit the movement any time, thus it is looped until all bytes
- # are copied!
- self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
+ # ...copystrcontent logic was removed, but note that
+ # if we want to reintroduce support for that:
+ # s390x has memset directly as a hardware instruction!!
+ # 0xB8 means we might reference dst later
+ #self.mc.MVCLE(dst, src, l.addr(0xB8))
+ # NOTE this instruction can (determined by the cpu), just
+ # quit the movement any time, thus it is looped until all bytes
+ # are copied!
+ #self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
def emit_zero_array(self, op, arglocs, regalloc):
base_loc, startindex_loc, length_loc, \
diff --git a/rpython/jit/backend/zarch/regalloc.py
b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -1269,29 +1269,6 @@
loc1 = self.ensure_reg(op.getarg(1))
return [loc0, loc1]
- def prepare_copystrcontent(self, op):
- """ this function needs five registers.
- src & src_len: are allocated using ensure_even_odd_pair.
- note that these are tmp registers, thus the actual variable
- value is not modified.
- src_len: when entering the assembler, src_ofs_loc's value is
contained
- in src_len register.
- """
- src_ptr_loc, _ = \
- self.rm.ensure_even_odd_pair(op.getarg(0),
- None, bind_first=True,
- must_exist=False, load_loc_odd=False)
- src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
- dst_ptr_loc = self.ensure_reg(op.getarg(1))
- dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
- length_loc = self.ensure_reg_or_any_imm(op.getarg(4))
- # no need to spill, we do not call memcpy, but we use s390x's
- # hardware instruction to copy memory
- return [src_ptr_loc, dst_ptr_loc,
- src_ofs_loc, dst_ofs_loc, length_loc]
-
- prepare_copyunicodecontent = prepare_copystrcontent
-
def prepare_label(self, op):
descr = op.getdescr()
assert isinstance(descr, TargetToken)
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1055,8 +1055,8 @@
'UNICODEGETITEM/2/i',
#
'LOAD_FROM_GC_TABLE/1/r', # only emitted by rewrite.py
- #
- 'LOAD_EFFECTIVE_ADDRESS/4/i', # only emitted by rewrite.py, shortcut for
x86
+ 'LOAD_EFFECTIVE_ADDRESS/4/i', # only emitted by rewrite.py, only if
+ # cpu.supports_load_effective_address. [v_gcptr,v_index,c_baseofs,c_shift]
#
'_ALWAYS_PURE_LAST', # ----- end of always_pure operations -----
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit