Author: Armin Rigo <[email protected]>
Branch: copystrcontents-in-rewrite
Changeset: r96780:c08ec5828388
Date: 2019-06-09 12:46 +0200
http://bitbucket.org/pypy/pypy/changeset/c08ec5828388/

Log:    Finish the logic for CPUs that don't have good support for
        load_effective_address: rewrite.py emits a sequence of int_add and
        int_lshift in this case.

diff --git a/rpython/jit/backend/arm/runner.py 
b/rpython/jit/backend/arm/runner.py
--- a/rpython/jit/backend/arm/runner.py
+++ b/rpython/jit/backend/arm/runner.py
@@ -23,6 +23,7 @@
     supports_floats = True
     supports_longlong = True
     supports_singlefloats = True
+    supports_load_effective_address = True
 
     from rpython.jit.backend.arm.arch import JITFRAME_FIXED_SIZE
     all_reg_indexes = range(len(all_regs))
diff --git a/rpython/jit/backend/llsupport/rewrite.py 
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -969,37 +969,52 @@
             basesize = self.gc_ll_descr.str_descr.basesize
             # because we have one extra item after alloc, the actual address
             # of string start is 1 lower, from extra_item_after_malloc
-            base = ConstInt(basesize - 1)
-            itemsize = self.gc_ll_descr.str_descr.itemsize
-            assert itemsize == 1
-            itemscale = ConstInt(0)
+            basesize -= 1
+            assert self.gc_ll_descr.str_descr.itemsize == 1
+            itemscale = 0
         else:
-            base = ConstInt(self.gc_ll_descr.unicode_descr.basesize)
+            basesize = self.gc_ll_descr.unicode_descr.basesize
             itemsize = self.gc_ll_descr.unicode_descr.itemsize
             if itemsize == 2:
-                itemscale = ConstInt(1)
+                itemscale = 1
             elif itemsize == 4:
-                itemscale = ConstInt(2)
+                itemscale = 2
             else:
                 assert False, "unknown size of unicode"
-        i1 = ResOperation(rop.LOAD_EFFECTIVE_ADDRESS,
-                          [op.getarg(0), op.getarg(2), base, itemscale])
-        i2 = ResOperation(rop.LOAD_EFFECTIVE_ADDRESS,
-                          [op.getarg(1), op.getarg(3), base, itemscale])
-        self.emit_op(i1)
-        self.emit_op(i2)
+        i1 = self.emit_load_effective_address(op.getarg(0), op.getarg(2),
+                                              basesize, itemscale)
+        i2 = self.emit_load_effective_address(op.getarg(1), op.getarg(3),
+                                              basesize, itemscale)
         if op.getopnum() == rop.COPYSTRCONTENT:
             arg = op.getarg(4)
         else:
             # do some basic constant folding
             if isinstance(op.getarg(4), ConstInt):
-                arg = ConstInt(op.getarg(4).getint() * itemsize)
+                arg = ConstInt(op.getarg(4).getint() << itemscale)
             else:
-                arg = ResOperation(rop.INT_MUL, [op.getarg(4), 
ConstInt(itemsize)])
+                arg = ResOperation(rop.INT_LSHIFT,
+                                   [op.getarg(4), ConstInt(itemscale)])
                 self.emit_op(arg)
         self.emit_op(ResOperation(rop.CALL_N,
             [ConstInt(memcpy_fn), i2, i1, arg], descr=memcpy_descr))
 
+    def emit_load_effective_address(self, v_gcptr, v_index, base, itemscale):
+        if self.cpu.supports_load_effective_address:
+            i1 = ResOperation(rop.LOAD_EFFECTIVE_ADDRESS,
+                              [v_gcptr, v_index, ConstInt(base),
+                               ConstInt(itemscale)])
+            self.emit_op(i1)
+            return i1
+        else:
+            if itemscale > 0:
+                v_index = ResOperation(rop.INT_LSHIFT,
+                                       [v_index, ConstInt(itemscale)])
+                self.emit_op(v_index)
+            i1b = ResOperation(rop.INT_ADD, [v_gcptr, v_index])
+            self.emit_op(i1b)
+            i1 = ResOperation(rop.INT_ADD, [i1b, ConstInt(base)])
+            self.emit_op(i1)
+            return i1
 
     def remove_constptr(self, c):
         """Remove all ConstPtrs, and replace them with load_from_gc_table.
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -148,6 +148,8 @@
         unicodelendescr = unicodedescr.lendescr
         strhashdescr     = self.gc_ll_descr.str_hash_descr
         unicodehashdescr = self.gc_ll_descr.unicode_hash_descr
+        uni_basesize  = unicodedescr.basesize
+        uni_itemscale = {2: 1, 4: 2}[unicodedescr.itemsize]
         memcpy_fn = self.gc_ll_descr.memcpy_fn
         memcpy_descr = self.gc_ll_descr.memcpy_descr
 
@@ -204,6 +206,7 @@
 
     load_constant_offset = True
     load_supported_factors = (1,2,4,8)
+    supports_load_effective_address = True
 
     translate_support_code = None
 
@@ -1454,3 +1457,46 @@
         i3 = load_effective_address(p1, i1, %(str_basesize)s, 0)
         call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr)
         """)
+
+    def test_rewrite_copystrcontents_without_load_effective_address(self):
+        self.cpu.supports_load_effective_address = False
+        self.check_rewrite("""
+        [p0, p1, i0, i1, i_len]
+        copystrcontent(p0, p1, i0, i1, i_len)
+        """, """
+        [p0, p1, i0, i1, i_len]
+        i2b = int_add(p0, i0)
+        i2 = int_add(i2b, %(str_basesize)s)
+        i3b = int_add(p1, i1)
+        i3 = int_add(i3b, %(str_basesize)s)
+        call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr)
+        """)
+
+    def test_rewrite_copyunicodecontents(self):
+        self.check_rewrite("""
+        [p0, p1, i0, i1, i_len]
+        copyunicodecontent(p0, p1, i0, i1, i_len)
+        """, """
+        [p0, p1, i0, i1, i_len]
+        i2 = load_effective_address(p0, i0, %(uni_basesize)s, 
%(uni_itemscale)d)
+        i3 = load_effective_address(p1, i1, %(uni_basesize)s, 
%(uni_itemscale)d)
+        i4 = int_lshift(i_len, %(uni_itemscale)d)
+        call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr)
+        """)
+
+    def test_rewrite_copyunicodecontents_without_load_effective_address(self):
+        self.cpu.supports_load_effective_address = False
+        self.check_rewrite("""
+        [p0, p1, i0, i1, i_len]
+        copyunicodecontent(p0, p1, i0, i1, i_len)
+        """, """
+        [p0, p1, i0, i1, i_len]
+        i0s = int_lshift(i0, %(uni_itemscale)d)
+        i2b = int_add(p0, i0s)
+        i2 = int_add(i2b, %(uni_basesize)s)
+        i1s = int_lshift(i1, %(uni_itemscale)d)
+        i3b = int_add(p1, i1s)
+        i3 = int_add(i3b, %(uni_basesize)s)
+        i4 = int_lshift(i_len, %(uni_itemscale)d)
+        call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr)
+        """)
diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py
--- a/rpython/jit/backend/model.py
+++ b/rpython/jit/backend/model.py
@@ -19,6 +19,7 @@
     # Boxes and Consts are BoxFloats and ConstFloats.
     supports_singlefloats = False
     supports_guard_gc_type = False
+    supports_load_effective_address = False
 
     propagate_exception_descr = None
 
diff --git a/rpython/jit/backend/ppc/opassembler.py 
b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -966,72 +966,6 @@
         pmc.overwrite()
 
 
-class StrOpAssembler(object):
-
-    _mixin_ = True
-
-    def emit_copystrcontent(self, op, arglocs, regalloc):
-        self._emit_copycontent(arglocs, is_unicode=False)
-
-    def emit_copyunicodecontent(self, op, arglocs, regalloc):
-        self._emit_copycontent(arglocs, is_unicode=True)
-
-    def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
-        if src_ofs.is_imm():
-            value = src_ofs.value << scale
-            if value < 32768:
-                self.mc.addi(dst.value, src_ptr.value, value)
-            else:
-                self.mc.load_imm(dst, value)
-                self.mc.add(dst.value, src_ptr.value, dst.value)
-        elif scale == 0:
-            self.mc.add(dst.value, src_ptr.value, src_ofs.value)
-        else:
-            self.mc.sldi(dst.value, src_ofs.value, scale)
-            self.mc.add(dst.value, src_ptr.value, dst.value)
-
-    def _emit_copycontent(self, arglocs, is_unicode):
-        [src_ptr_loc, dst_ptr_loc,
-         src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
-
-        if is_unicode:
-            basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
-                                        self.cpu.translate_support_code)
-            if   itemsize == 2: scale = 1
-            elif itemsize == 4: scale = 2
-            else: raise AssertionError
-        else:
-            basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
-                                        self.cpu.translate_support_code)
-            assert itemsize == 1
-            basesize -= 1     # for the extra null character
-            scale = 0
-
-        self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale)
-        self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale)
-
-        if length_loc.is_imm():
-            length = length_loc.getint()
-            self.mc.load_imm(r.r5, length << scale)
-        else:
-            if scale > 0:
-                self.mc.sldi(r.r5.value, length_loc.value, scale)
-            elif length_loc is not r.r5:
-                self.mc.mr(r.r5.value, length_loc.value)
-
-        self.mc.mr(r.r4.value, r.r0.value)
-        self.mc.addi(r.r4.value, r.r4.value, basesize)
-        self.mc.addi(r.r3.value, r.r2.value, basesize)
-
-        self.mc.load_imm(self.mc.RAW_CALL_REG, self.memcpy_addr)
-        self.mc.raw_call()
-
-
-class UnicodeOpAssembler(object):
-    _mixin_ = True
-    # empty!
-
-
 class AllocOpAssembler(object):
 
     _mixin_ = True
@@ -1336,8 +1270,7 @@
 
 class OpAssembler(IntOpAssembler, GuardOpAssembler,
                   MiscOpAssembler, FieldOpAssembler,
-                  StrOpAssembler, CallOpAssembler,
-                  UnicodeOpAssembler, ForceOpAssembler,
+                  CallOpAssembler, ForceOpAssembler,
                   AllocOpAssembler, FloatOpAssembler,
                   VectorAssembler):
     _mixin_ = True
diff --git a/rpython/jit/backend/ppc/regalloc.py 
b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -802,26 +802,6 @@
         temp_loc = r.SCRATCH2
         return [base_loc, temp_loc]
 
-    def prepare_load_effective_address(self, op):
-        arg0 = self.ensure_reg(op.getarg(0))
-        arg1 = self.ensure_reg(op.getarg(1))
-        arg2 = self.ensure_reg_or_any_imm(op.getarg(2))
-        arg3 = self.ensure_reg_or_any_imm(op.getarg(3))
-        resloc = self.force_allocate_reg(op)
-        return [arg0, arg1, arg2, arg3, resloc]
-
-    def prepare_copystrcontent(self, op):
-        src_ptr_loc = self.ensure_reg(op.getarg(0))
-        dst_ptr_loc = self.ensure_reg(op.getarg(1))
-        src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
-        dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
-        length_loc  = self.ensure_reg_or_any_imm(op.getarg(4))
-        self._spill_before_call(gc_level=0)
-        return [src_ptr_loc, dst_ptr_loc,
-                src_ofs_loc, dst_ofs_loc, length_loc]
-
-    prepare_copyunicodecontent = prepare_copystrcontent
-
     prepare_same_as_i = helper.prepare_unary_op
     prepare_same_as_r = helper.prepare_unary_op
     prepare_same_as_f = helper.prepare_unary_op
diff --git a/rpython/jit/backend/x86/runner.py 
b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -16,6 +16,7 @@
     debug = True
     supports_floats = True
     supports_singlefloats = True
+    supports_load_effective_address = True
 
     dont_keepalive_stuff = False # for tests
     with_threads = False
diff --git a/rpython/jit/backend/zarch/opassembler.py 
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -963,75 +963,15 @@
     def _mem_offset_supported(self, value):
         return -2**19 <= value < 2**19
 
-    def emit_copystrcontent(self, op, arglocs, regalloc):
-        self._emit_copycontent(arglocs, is_unicode=False)
-
-    def emit_copyunicodecontent(self, op, arglocs, regalloc):
-        self._emit_copycontent(arglocs, is_unicode=True)
-
-    def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
-        if src_ofs.is_imm():
-            value = src_ofs.value << scale
-            if check_imm_value(value):
-                self.mc.AGHIK(dst, src_ptr, l.imm(value))
-            else:
-                # it is fine to use r1 here, because it will
-                # only hold a value before invoking the memory copy
-                self.mc.load_imm(r.SCRATCH, value)
-                self.mc.AGRK(dst, src_ptr, r.SCRATCH)
-        elif scale == 0:
-            self.mc.AGRK(dst, src_ptr, src_ofs)
-        else:
-            self.mc.SLLG(r.SCRATCH, src_ofs, l.addr(scale))
-            self.mc.AGRK(dst, src_ptr, r.SCRATCH)
-
-    def _emit_copycontent(self, arglocs, is_unicode):
-        [src_ptr_loc, dst_ptr_loc,
-         src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
-
-        if is_unicode:
-            basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
-                                        self.cpu.translate_support_code)
-            if   itemsize == 2: scale = 1
-            elif itemsize == 4: scale = 2
-            else: raise AssertionError
-        else:
-            basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
-                                        self.cpu.translate_support_code)
-            assert itemsize == 1
-            basesize -= 1     # for the extra null character
-            scale = 0
-
-        # src and src_len are tmp registers
-        src = src_ptr_loc
-        src_len = r.odd_reg(src)
-        dst = r.r0
-        dst_len = r.r1
-        self._emit_load_for_copycontent(src, src_ptr_loc, src_ofs_loc, scale)
-        self._emit_load_for_copycontent(dst, dst_ptr_loc, dst_ofs_loc, scale)
-
-        if length_loc.is_imm():
-            length = length_loc.getint()
-            self.mc.load_imm(dst_len, length << scale)
-        else:
-            if scale > 0:
-                self.mc.SLLG(dst_len, length_loc, l.addr(scale))
-            else:
-                self.mc.LGR(dst_len, length_loc)
-        # ensure that src_len is as long as dst_len, otherwise
-        # padding bytes are written to dst
-        self.mc.LGR(src_len, dst_len)
-
-        self.mc.AGHI(src, l.imm(basesize))
-        self.mc.AGHI(dst, l.imm(basesize))
-
-        # s390x has memset directly as a hardware instruction!!
-        # 0xB8 means we might reference dst later
-        self.mc.MVCLE(dst, src, l.addr(0xB8))
-        # NOTE this instruction can (determined by the cpu), just
-        # quit the movement any time, thus it is looped until all bytes
-        # are copied!
-        self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
+    # ...copystrcontent logic was removed, but note that
+    # if we want to reintroduce support for that:
+    # s390x has memset directly as a hardware instruction!!
+    # 0xB8 means we might reference dst later
+    #self.mc.MVCLE(dst, src, l.addr(0xB8))
+    # NOTE this instruction can (determined by the cpu), just
+    # quit the movement any time, thus it is looped until all bytes
+    # are copied!
+    #self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
 
     def emit_zero_array(self, op, arglocs, regalloc):
         base_loc, startindex_loc, length_loc, \
diff --git a/rpython/jit/backend/zarch/regalloc.py 
b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -1269,29 +1269,6 @@
         loc1 = self.ensure_reg(op.getarg(1))
         return [loc0, loc1]
 
-    def prepare_copystrcontent(self, op):
-        """ this function needs five registers.
-            src & src_len: are allocated using ensure_even_odd_pair.
-              note that these are tmp registers, thus the actual variable
-              value is not modified.
-            src_len: when entering the assembler, src_ofs_loc's value is 
contained
-              in src_len register.
-        """
-        src_ptr_loc, _ = \
-                self.rm.ensure_even_odd_pair(op.getarg(0),
-                             None, bind_first=True, 
-                             must_exist=False, load_loc_odd=False)
-        src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
-        dst_ptr_loc = self.ensure_reg(op.getarg(1))
-        dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
-        length_loc  = self.ensure_reg_or_any_imm(op.getarg(4))
-        # no need to spill, we do not call memcpy, but we use s390x's
-        # hardware instruction to copy memory
-        return [src_ptr_loc, dst_ptr_loc,
-                src_ofs_loc, dst_ofs_loc, length_loc]
-
-    prepare_copyunicodecontent = prepare_copystrcontent
-
     def prepare_label(self, op):
         descr = op.getdescr()
         assert isinstance(descr, TargetToken)
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1055,8 +1055,8 @@
     'UNICODEGETITEM/2/i',
     #
     'LOAD_FROM_GC_TABLE/1/r',    # only emitted by rewrite.py
-    #
-    'LOAD_EFFECTIVE_ADDRESS/4/i', # only emitted by rewrite.py, shortcut for 
x86
+    'LOAD_EFFECTIVE_ADDRESS/4/i', # only emitted by rewrite.py, only if
+    # cpu.supports_load_effective_address. [v_gcptr,v_index,c_baseofs,c_shift]
     #
     '_ALWAYS_PURE_LAST',  # ----- end of always_pure operations -----
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to