Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r85475:c2a7f4349490
Date: 2016-06-30 16:36 +0200
http://bitbucket.org/pypy/pypy/changeset/c2a7f4349490/
Log: provide vec_expand_i implementation
diff --git a/rpython/jit/backend/ppc/codebuilder.py
b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -706,6 +706,11 @@
vsel = VA(4, XO10=42)
vspltisb = VXI(4, XO8=780)
+ VX_splat = Form("ivrT", "ivrB", "ivrA", "XO8")
+ vspltb = VX_splat(4, XO8=524)
+ vsplth = VX_splat(4, XO8=588)
+ vspltw = VX_splat(4, XO8=652)
+
diff --git a/rpython/jit/backend/ppc/vector_ext.py
b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -481,35 +481,35 @@
elif size == 8:
# splat the low of src to both slots in res
src = srcloc.value
- #import pdb; pdb.set_trace()
self.mc.xxspltdl(res, src, src)
else:
notimplemented("[ppc/assembler] vec expand in this combination not
supported")
def emit_vec_expand_i(self, op, arglocs, regalloc):
- notimplemented("[vec expand i]")
- srcloc, sizeloc = arglocs
- if not isinstance(srcloc, RegLoc):
- self.mov(srcloc, X86_64_SCRATCH_REG)
- srcloc = X86_64_SCRATCH_REG
- assert not srcloc.is_xmm
- size = sizeloc.value
+ res, l0, off = arglocs
+ size = op.bytesize
+
+ self.mc.load_imm(r.SCRATCH2, off.value)
+ self.mc.lvx(res.value, r.SCRATCH2.value, r.SP.value)
if size == 1:
- self.mc.PINSRB_xri(resloc.value, srcloc.value, 0)
- self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr))
+ if IS_BIG_ENDIAN:
+ self.mc.vspltb(res.value, res.value, 0b0000)
+ else:
+ self.mc.vspltb(res.value, res.value, 0b1111)
elif size == 2:
- self.mc.PINSRW_xri(resloc.value, srcloc.value, 0)
- self.mc.PINSRW_xri(resloc.value, srcloc.value, 4)
- self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0)
- self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0)
+ if IS_BIG_ENDIAN:
+ self.mc.vsplth(res.value, res.value, 0b000)
+ else:
+ self.mc.vsplth(res.value, res.value, 0b111)
elif size == 4:
- self.mc.PINSRD_xri(resloc.value, srcloc.value, 0)
- self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0)
+ if IS_BIG_ENDIAN:
+ self.mc.vspltw(res.value, res.value, 0b00)
+ else:
+ self.mc.vspltw(res.value, res.value, 0b11)
elif size == 8:
- self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
- self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
+ pass
else:
- raise AssertionError("cannot handle size %d (int expand)" %
(size,))
+ notimplemented("[expand int size not impl]")
#def genop_vec_pack_i(self, op, arglocs, regalloc):
# resultloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc =
arglocs
@@ -811,7 +811,20 @@
res = self.force_allocate_vector_reg(op)
return [res, l0]
- prepare_vec_expand_i = prepare_vec_expand_f
+ def prepare_vec_expand_i(self, op):
+ arg = op.getarg(0)
+ mc = self.assembler.mc
+ if arg.is_constant():
+ l0 = self.rm.get_scratch_reg()
+ mc.load_imm(l0, arg.value)
+ else:
+ l0 = self.ensure_reg(arg)
+ mc.store(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
+ size = op.bytesize
+ if size == 8:
+ mc.store(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET+8)
+ res = self.force_allocate_vector_reg(op)
+ return [res, l0, imm(PARAM_SAVE_AREA_OFFSET)]
def prepare_vec_int_is_true(self, op):
arg = op.getarg(0)
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -755,7 +755,7 @@
node.pack = self
node.pack_position = i
- def split(self, packlist, vec_reg_size):
+ def split(self, packlist, vec_reg_size, vector_ext):
""" Combination phase creates the biggest packs that are possible.
In this step the pack is reduced in size to fit into an
vector register.
@@ -764,7 +764,7 @@
pack = self
while pack.pack_load(vec_reg_size) > Pack.FULL:
pack.clear()
- oplist, newoplist = pack.slice_operations(vec_reg_size)
+ oplist, newoplist = pack.slice_operations(vec_reg_size, vector_ext)
pack.operations = oplist
pack.update_pack_of_nodes()
if not pack.leftmost().is_typecast():
@@ -782,13 +782,13 @@
break
pack.update_pack_of_nodes()
- def opcount_filling_vector_register(self, vec_reg_size):
+ def opcount_filling_vector_register(self, vec_reg_size, vector_ext):
left = self.leftmost()
- oprestrict = trans.get(left)
+ oprestrict = vector_ext.get_operation_restriction(left)
return oprestrict.opcount_filling_vector_register(left, vec_reg_size)
- def slice_operations(self, vec_reg_size):
- count = self.opcount_filling_vector_register(vec_reg_size)
+ def slice_operations(self, vec_reg_size, vector_ext):
+ count = self.opcount_filling_vector_register(vec_reg_size, vector_ext)
assert count > 0
newoplist = self.operations[count:]
oplist = self.operations[:count]
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -451,7 +451,7 @@
if len_before == len(self.packset.packs):
break
- self.packset.split_overloaded_packs()
+ self.packset.split_overloaded_packs(self.cpu.vector_ext)
if not we_are_translated():
# some test cases check the accumulation variables
@@ -814,12 +814,12 @@
state.setvector_of_box(seed, 0, vecop) # prevent it from expansion
state.renamer.start_renaming(seed, vecop)
- def split_overloaded_packs(self):
+ def split_overloaded_packs(self, vector_ext):
newpacks = []
for i,pack in enumerate(self.packs):
load = pack.pack_load(self.vec_reg_size)
if load > Pack.FULL:
- pack.split(newpacks, self.vec_reg_size)
+ pack.split(newpacks, self.vec_reg_size, vector_ext)
continue
if load < Pack.FULL:
for op in pack.operations:
diff --git a/rpython/jit/metainterp/test/test_vector.py
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -430,21 +430,24 @@
res = self.meta_interp(f, [60], vec_all=True)
assert res == f(60) == 34.5
- def test_variable_expand(self):
+ @py.test.mark.parametrize('type,value', [(rffi.DOUBLE, 58.4547),
+ (lltype.Signed, 2300000), (rffi.INT, 4321),
+ (rffi.SHORT, 9922), (rffi.SIGNEDCHAR, -127)])
+ def test_variable_expand(self, type, value):
myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
- T = lltype.Array(rffi.DOUBLE, hints={'nolength': True})
+ T = lltype.Array(type, hints={'nolength': True})
def f(d,variable):
va = lltype.malloc(T, d, flavor='raw', zero=True)
i = 0
while i < d:
myjitdriver.jit_merge_point()
- va[i] = va[i] + variable
+ va[i] = rffi.cast(type, variable)
i += 1
val = va[d//2]
lltype.free(va, flavor='raw')
return val
- res = self.meta_interp(f, [60,58.4547])
- assert res == f(60,58.4547) == 58.4547
+ res = self.meta_interp(f, [60,value])
+ assert res == f(60,value) == value
@py.test.mark.parametrize('vec,vec_all',[(False,True),(True,False),(True,True),(False,False)])
def test_accum(self, vec, vec_all):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit