Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77292:7d60c4409027 Date: 2015-05-11 15:46 +0200 http://bitbucket.org/pypy/pypy/changeset/7d60c4409027/
Log: added some missing vector x86 instructions to mc started to implement the new instructions (pack/unpack/expand/...) not yet finished. i need to find the suitable instructions for those diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -4,15 +4,20 @@ import py from rpython.jit.metainterp.test.support import LLJitMixin +from rpython.jit.backend.x86.test.test_basic import Jit386Mixin from rpython.jit.metainterp.warmspot import reset_jit, get_stats from pypy.module.micronumpy import boxes from pypy.module.micronumpy.compile import FakeSpace, Parser, InterpreterState from pypy.module.micronumpy.base import W_NDimArray -class TestNumpyJit(LLJitMixin): +class TestNumpyJit(Jit386Mixin): graph = None interp = None + def setup_method(self, method): + if not self.CPUClass.vector_extension: + py.test.skip("needs vector extension to run (for now)") + def setup_class(cls): default = """ a = [1,2,3,4] @@ -128,7 +133,6 @@ """ def test_sum(self): - py.test.skip('TODO') result = self.run("sum") assert result == sum(range(30)) self.check_trace_count(1) @@ -150,7 +154,6 @@ """ def test_cumsum(self): - py.test.skip('TODO') result = self.run("cumsum") assert result == 15 self.check_trace_count(1) @@ -220,7 +223,6 @@ }) def define_reduce(): - py.test.skip('TODO') return """ a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] sum(a) diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py --- a/rpython/jit/backend/llgraph/runner.py +++ b/rpython/jit/backend/llgraph/runner.py @@ -242,7 +242,10 @@ translate_support_code = False is_llgraph = True - vector_register_size = 16 + vector_extension = True + vector_register_size = 16 # in bytes + vector_horizontal_operations = True + vector_pack_slots = True def __init__(self, rtyper, stats=None, *ignored_args, **kwds): model.AbstractCPU.__init__(self) @@ -794,8 +797,6 @@ _type = longlong.FLOATSTORAGE else: raise AssertionError(box) - #for a in arg: - # assert lltype.typeOf(a) == _type else: raise AssertionError(box) # diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py --- a/rpython/jit/backend/llsupport/llmodel.py +++ b/rpython/jit/backend/llsupport/llmodel.py @@ -25,6 +25,11 @@ HAS_CODEMAP = False + vector_extension = False + vector_register_size = 0 # in bytes + vector_horizontal_operations = False + vector_pack_slots = False + def __init__(self, rtyper, stats, opts, translate_support_code=False, gcdescr=None): assert type(opts) is not bool diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1,5 +1,6 @@ import sys import os +import py from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler, @@ -2517,6 +2518,45 @@ else: raise NotImplementedError + def genop_vec_int_sub(self, op, arglocs, resloc): + loc0, loc1, itemsize_loc = arglocs + itemsize = itemsize_loc.value + if itemsize == 1: + self.mc.PSUBB(loc0, loc1) + elif itemsize == 2: + self.mc.PSUBW(loc0, loc1) + elif itemsize == 4: + self.mc.PSUBD(loc0, loc1) + elif itemsize == 8: + self.mc.PSUBQ(loc0, loc1) + else: + raise NotImplementedError + + genop_vec_float_arith = """ + def genop_vec_float_{type}(self, op, arglocs, resloc): + loc0, loc1, itemsize_loc = arglocs + itemsize = itemsize_loc.value + if itemsize == 4: + self.mc.{p_op_s}(loc0, loc1) + elif itemsize == 8: + self.mc.{p_op_d}(loc0, loc1) + else: + raise NotImplementedError + """ + for op in ['add','mul','sub','div']: + OP = op.upper() + _source = genop_vec_float_arith.format(type=op, p_op_s=OP+'PS',p_op_d=OP+'PD') + exec py.code.Source(_source).compile() + del genop_vec_float_arith + + def genop_vec_unpack(self, op, arglocs, resloc): + loc0, indexloc, sizeloc = arglocs + size = sizeloc.value + if size == 4: + pass + elif size == 8: + self.mc.CMPPD( + def genop_vec_int_signext(self, op, arglocs, resloc): pass diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1505,7 +1505,8 @@ consider_vec_raw_store = consider_vec_setarrayitem_raw - def consider_vec_int_add(self, op): + + def consider_vec_arith(self, op): count = op.getarg(2) assert isinstance(count, ConstInt) itemsize = self.assembler.cpu.vector_register_size // count.value @@ -1514,6 +1515,26 @@ loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args) self.perform(op, [loc0, loc1, imm(itemsize)], loc0) + consider_vec_int_add = consider_vec_arith + consider_vec_int_sub = consider_vec_arith + consider_vec_int_mul = consider_vec_arith + consider_vec_float_add = consider_vec_arith + consider_vec_float_sub = consider_vec_arith + consider_vec_float_mul = consider_vec_arith + del consider_vec_arith + + def consider_vec_logic(self, op): + count = op.getarg(2) + assert isinstance(count, ConstInt) + itemsize = self.assembler.cpu.vector_register_size // count.value + args = op.getarglist() + loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args) + loc1 = self.xrm.make_sure_var_in_reg(op.getarg(1), args) + self.perform(op, [loc0, loc1, imm(itemsize)], loc0) + + consider_vec_float_eq = consider_vec_logic + del consider_vec_logic + def consider_vec_int_signext(self, op): # there is not much we can do in this case. arithmetic is # done on the vector register, if there is a wrap around, @@ -1524,6 +1545,35 @@ #if op.getarg(1).value != op.getarg(2).value: # raise NotImplementedError("signext not implemented") + def consider_vec_box_pack(self, op): + count = op.getarg(3) + index = op.getarg(2) + assert isinstance(count, ConstInt) + assert isinstance(index, ConstInt) + itemsize = self.assembler.cpu.vector_register_size // count.value + args = op.getarglist() + loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0), args) + loc1 = self.make_sure_var_in_reg(op.getarg(1), args) + self.perform(op, [loc0, loc1, imm(index.value), imm(itemsize)], None) + + def consider_vec_box_unpack(self, op): + count = op.getarg(2) + index = op.getarg(1) + assert isinstance(count, ConstInt) + assert isinstance(index, ConstInt) + itemsize = self.assembler.cpu.vector_register_size // count.value + args = op.getarglist() + loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0), args) + result = self.force_allocate_reg(op.result, args) + self.perform(op, [loc0, imm(index.value), imm(itemsize)], result) + + def consider_vec_expand(self, op): + pass + + def consider_vec_box(self, op): + # pseudo instruction, needed to create a new variable + pass + def consider_guard_early_exit(self, op): pass diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py --- a/rpython/jit/backend/x86/runner.py +++ b/rpython/jit/backend/x86/runner.py @@ -24,11 +24,6 @@ with_threads = False frame_reg = regloc.ebp - vector_extension = False - vector_register_size = 0 # in bytes - vector_horizontal_operations = False - vector_pack_slots = False - from rpython.jit.backend.x86.arch import JITFRAME_FIXED_SIZE all_reg_indexes = gpr_reg_mgr_cls.all_reg_indexes gen_regs = gpr_reg_mgr_cls.all_regs diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py --- a/rpython/jit/backend/x86/rx86.py +++ b/rpython/jit/backend/x86/rx86.py @@ -920,6 +920,15 @@ define_modrm_modes('XORPS_x*', [rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM') define_modrm_modes('ANDPD_x*', ['\x66', rex_nw, '\x0F\x54', register(1, 8)], regtype='XMM') +define_modrm_modes('ADDPD_x*', ['\x66', rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM') +define_modrm_modes('ADDPS_x*', [ rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM') +define_modrm_modes('SUBPD_x*', ['\x66', rex_nw, '\x0F\x5C', register(1, 8)], regtype='XMM') +define_modrm_modes('SUBPS_x*', [ rex_nw, '\x0F\x5C', register(1, 8)], regtype='XMM') +define_modrm_modes('MULPD_x*', ['\x66', rex_nw, '\x0F\x59', register(1, 8)], regtype='XMM') +define_modrm_modes('MULPS_x*', [ rex_nw, '\x0F\x59', register(1, 8)], regtype='XMM') +define_modrm_modes('DIVPD_x*', ['\x66', rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM') +define_modrm_modes('DIVPS_x*', [ rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM') + def define_pxmm_insn(insnname_template, insn_char): def add_insn(char, *post): methname = insnname_template.replace('*', char) @@ -938,6 +947,9 @@ define_pxmm_insn('PADDW_x*', '\xFD') define_pxmm_insn('PADDB_x*', '\xFC') define_pxmm_insn('PSUBQ_x*', '\xFB') +define_pxmm_insn('PSUBD_x*', '\xFA') +define_pxmm_insn('PSUBW_x*', '\xF9') +define_pxmm_insn('PSUBB_x*', '\xF8') define_pxmm_insn('PAND_x*', '\xDB') define_pxmm_insn('POR_x*', '\xEB') define_pxmm_insn('PXOR_x*', '\xEF') diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -402,7 +402,9 @@ (j, vbox) = box_to_vbox.get(arg, (-1, None)) if vbox: arg_cloned = arg.clonebox() - unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, ConstInt(j)], arg_cloned) + cj = ConstInt(j) + ci = ConstInt(vbox.item_count) + unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, cj, ci], arg_cloned) self.emit_operation(unpack_op) sched_data.rename_unpacked(arg, arg_cloned) op.setarg(i, arg_cloned) @@ -415,7 +417,9 @@ (j, vbox) = box_to_vbox.get(arg, (-1, None)) if vbox: arg_cloned = arg.clonebox() - unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, ConstInt(j)], arg_cloned) + cj = ConstInt(j) + ci = ConstInt(vbox.item_count) + unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, cj, ci], arg_cloned) self.emit_operation(unpack_op) sched_data.rename_unpacked(arg, arg_cloned) fail_args[i] = arg_cloned @@ -619,6 +623,7 @@ break vbox = BoxVector(arg.type, len(ops)) + print "creating vectorbox", vbox, "of type",arg.type if all_same_box: expand_op = ResOperation(rop.VEC_EXPAND, [arg, ConstInt(len(ops))], vbox) self.preamble_ops.append(expand_op) diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -458,12 +458,13 @@ 'VEC_FLOAT_ADD/3', 'VEC_FLOAT_SUB/3', 'VEC_FLOAT_MUL/3', + 'VEC_FLOAT_DIV/3', 'VEC_FLOAT_EQ/3', 'VEC_INT_SIGNEXT/3', '_VEC_ARITHMETIC_LAST', - 'VEC_BOX_UNPACK/2', - 'VEC_BOX_PACK/3', - 'VEC_EXPAND/2', + 'VEC_BOX_UNPACK/3', # iX|fX = VEC_BOX_UNPACK(vX, index, item_count) + 'VEC_BOX_PACK/4', # VEC_BOX_PACK(vX, var/const, index, item_count) + 'VEC_EXPAND/2', # vX = VEC_EXPAND(var/const, item_count) 'VEC_BOX/1', # 'INT_LT/2b', @@ -725,6 +726,7 @@ rop.FLOAT_ADD: rop.VEC_FLOAT_ADD, rop.FLOAT_SUB: rop.VEC_FLOAT_SUB, rop.FLOAT_MUL: rop.VEC_FLOAT_MUL, + rop.FLOAT_TRUEDIV: rop.VEC_FLOAT_DIV, rop.FLOAT_EQ: rop.VEC_FLOAT_EQ, rop.INT_SIGNEXT: rop.VEC_INT_SIGNEXT, _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit