Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79651:5b32b72ad145
Date: 2015-09-16 14:02 +0200
http://bitbucket.org/pypy/pypy/changeset/5b32b72ad145/
Log: scheduling tests passing again
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -70,13 +70,14 @@
return True
return node.depends_count() != 0
- def mark_emitted(self, node, state):
+ def mark_emitted(self, node, state, unpack=True):
""" An operation has been emitted, adds new operations to the worklist
whenever their dependency count drops to zero.
Keeps worklist sorted (see priority) """
op = node.getoperation()
state.renamer.rename(op)
- state.ensure_args_unpacked(op)
+ if unpack:
+ state.ensure_args_unpacked(op)
node.position = len(state.oplist)
worklist = state.worklist
for dep in node.provides()[:]: # COPY
@@ -322,7 +323,7 @@
rop.UINT_LT, rop.UINT_LE,
rop.UINT_GT, rop.UINT_GE)
-def turn_to_vector(state, pack):
+def turn_into_vector(state, pack):
""" Turn a pack into a vector instruction """
#
# TODO self.check_if_pack_supported(pack)
@@ -546,7 +547,7 @@
i += 1
else:
# note that heterogenous nodes are not yet tracked
- vecop = expanded_map.get(arg, None)
+ vecop = state.find_expanded([arg])
if vecop:
args[index] = vecop
return vecop
@@ -554,12 +555,20 @@
ops.append(vecop)
if variables is not None:
variables.append(vecop)
- expanded_map[arg] = vecop
+ state.expand([arg], vecop)
+ #expanded_map.setdefault(arg,[]).append((vecop, -1))
#for i in range(vecop.count):
# state.setvector_of_box(arg, i, vecop)
args[index] = vecop
return vecop
+ # quick search if it has already been expanded
+ expandargs = [op.getoperation().getarg(index) for op in pack.operations]
+ vecop = state.find_expanded(expandargs)
+ if vecop:
+ args[index] = vecop
+ return vecop
+
vecop = OpHelpers.create_vec(arg.type, left.bytesize, left.signed)
ops.append(vecop)
for i,node in enumerate(pack.operations):
@@ -568,8 +577,8 @@
arguments = [vecop, arg, ConstInt(i), ConstInt(1)]
vecop = OpHelpers.create_vec_pack(arg.type, arguments, left.bytesize,
left.signed, vecop.count+1)
- #state.setvector_of_box(arg, i, vecop)
ops.append(vecop)
+ state.expand(expandargs, vecop)
if variables is not None:
variables.append(vecop)
@@ -589,6 +598,44 @@
self.inputargs[arg] = None
self.seen = {}
+ def expand(self, args, vecop):
+ index = 0
+ if len(args) == 1:
+ # loop is executed once, thus sets -1 as index
+ index = -1
+ for arg in args:
+ self.expanded_map.setdefault(arg, []).append((vecop, index))
+ index += 1
+
+ def find_expanded(self, args):
+ if len(args) == 1:
+ candidates = self.expanded_map.get(args[0], [])
+ for (vecop, index) in candidates:
+ if index == -1:
+ # found an expanded variable/constant
+ return vecop
+ return None
+ possible = {}
+ for i, arg in enumerate(args):
+ expansions = self.expanded_map.get(arg, [])
+ candidates = [vecop for (vecop, index) in expansions \
+ if i == index and possible.get(vecop,True)]
+ for vecop in candidates:
+ for key in possible.keys():
+ if key not in candidates:
+ # delete every not possible key,value
+ possible[key] = False
+ # found a candidate, append it if not yet present
+ possible[vecop] = True
+
+ if not possible:
+ # no possibility left, this combination is not expanded
+ return None
+ for vecop,valid in possible.items():
+ if valid:
+ return vecop
+ return None
+
def post_schedule(self):
loop = self.graph.loop
self.ensure_args_unpacked(loop.jump)
@@ -633,8 +680,8 @@
if node.pack:
assert node.pack.numops() > 1
for node in node.pack.operations:
- scheduler.mark_emitted(node, self)
- turn_to_vector(self, node.pack)
+ scheduler.mark_emitted(node, self, unpack=False)
+ turn_into_vector(self, node.pack)
return True
return False
@@ -673,7 +720,7 @@
fail_arguments[i] = arg
def ensure_unpacked(self, index, arg):
- if arg in self.seen or not arg.is_vector():
+ if arg in self.seen or arg.is_vector():
return arg
(pos, var) = self.getvector_of_box(arg)
if var:
@@ -722,7 +769,8 @@
if op.is_typecast():
if op.casts_down():
- return vec_reg_size // op.cast_from_bytesize()
+ size = op.cast_input_bytesize(vec_reg_size)
+ return size // op.cast_from_bytesize()
else:
return vec_reg_size // op.cast_to_bytesize()
return vec_reg_size // op.bytesize
@@ -791,10 +839,10 @@
if left.casts_down():
# size is reduced
size = left.cast_input_bytesize(vec_reg_size)
- import pdb; pdb.set_trace()
return left.cast_from_bytesize() * self.numops() - size
else:
# size is increased
+ #size = left.cast_input_bytesize(vec_reg_size)
return left.cast_to_bytesize() * self.numops() - vec_reg_size
return left.bytesize * self.numops() - vec_reg_size
@@ -823,10 +871,13 @@
In this step the pack is reduced in size to fit into an
vector register.
"""
+ before_count = len(packlist)
+ print "splitting pack", self
pack = self
while pack.pack_load(vec_reg_size) > Pack.FULL:
pack.clear()
oplist, newoplist = pack.slice_operations(vec_reg_size)
+ print " split of %dx, left: %d" % (len(oplist), len(newoplist))
pack.operations = oplist
pack.update_pack_of_nodes()
if not pack.leftmost().is_typecast():
@@ -842,6 +893,7 @@
newpack.clear()
newpack.operations = []
break
+ print " => %dx packs out of %d operations" % (-before_count +
len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
pack.update_pack_of_nodes()
def slice_operations(self, vec_reg_size):
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -22,6 +22,11 @@
self.packs = packs
self.vec_reg_size = 16
+class FakeVecScheduleState(VecScheduleState):
+ def __init__(self):
+ self.expanded_map = {}
+
+
class SchedulerBaseTest(DependencyBaseTest):
def setup_class(self):
@@ -294,13 +299,11 @@
v15[2xi32] = vec_cast_float_to_int(v11[2xf64])
v16[2xi32] = vec_cast_float_to_int(v12[2xf64])
v17[2xi32] = vec_cast_float_to_int(v13[2xf64])
- v18[2xi16] = vec_int_signext(v14[2xi32],2)
- v19[2xi16] = vec_int_signext(v15[2xi32],2)
- v20[2xi16] = vec_int_signext(v16[2xi32],2)
- v21[2xi16] = vec_int_signext(v17[2xi32],2)
- v22[4xi16] = vec_pack_i(v18[2xi16], v19[2xi16], 2, 2)
- v23[6xi16] = vec_pack_i(v22[4xi16], v20[2xi16], 4, 2)
- v24[8xi16] = vec_pack_i(v23[6xi16], v21[2xi16], 6, 2)
+ v22[4xi32] = vec_pack_i(v14[2xi32], v15[2xi32], 2, 2)
+ v18[4xi16] = vec_int_signext(v22[4xi32],2)
+ v23[6xi16] = vec_pack_i(v16[2xi32], v17[2xi32], 2, 2)
+ v20[4xi16] = vec_int_signext(v23[4xi32],2)
+ v24[8xi16] = vec_pack_i(v18[4xi16], v20[4xi16], 4, 4)
vec_raw_store(p1, i1, v24[8xi16], descr=short)
""", False)
self.assert_equal(loop2, loop3)
@@ -463,3 +466,19 @@
packset.split_overloaded_packs()
assert len(packset.packs) == 1
+ def test_expand(self):
+ state = FakeVecScheduleState()
+ assert state.find_expanded([]) == None
+ state.expand(['a'], 'a')
+ assert state.find_expanded(['a']) == 'a'
+ state.expand(['a','b','c'], 'abc')
+ assert state.find_expanded(['a','b','c']) == 'abc'
+ state.expand(['a','d','c'], 'adc')
+ assert state.find_expanded(['a','b','c']) == 'abc'
+ assert state.find_expanded(['a','d','c']) == 'adc'
+ assert state.find_expanded(['d','d','c']) == None
+ state.expand(['d','d','c'], 'ddc')
+ assert state.find_expanded(['d','d','c']) == 'ddc'
+
+
+
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -830,17 +830,19 @@
def split_overloaded_packs(self):
newpacks = []
- import pdb; pdb.set_trace()
for i,pack in enumerate(self.packs):
load = pack.pack_load(self.vec_reg_size)
if load > Pack.FULL:
+ print "overloaded pack", pack
pack.split(newpacks, self.vec_reg_size)
continue
if load < Pack.FULL:
+ print "underloaded pack", pack
for op in pack.operations:
op.priority = -100
pack.clear()
self.packs[i] = None
continue
+ print "fully packed", pack
self.packs = [pack for pack in self.packs + newpacks if pack]
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -456,7 +456,7 @@
def is_typecast(self):
return False
- def cast_count(self):
+ def cast_count(self, vec_reg_size):
return self.casts[4]
def cast_types(self):
@@ -667,7 +667,7 @@
def cast_input_bytesize(self, vec_reg_size):
count = vec_reg_size // self.cast_to_bytesize()
- size = self.cast_from_bytesize() * self.count
+ size = self.cast_from_bytesize() * self.cast_count(vec_reg_size)
return size
class SignExtOp(object):
@@ -689,8 +689,8 @@
arg = self.getarg(0)
return arg.bytesize
- def cast_count(self):
- return self.casts[4]
+ def cast_input_bytesize(self, vec_reg_size):
+ return vec_reg_size # self.cast_from_bytesize() *
self.cast_count(vec_reg_size)
class VectorOp(object):
@@ -1170,11 +1170,11 @@
]
_cast_ops = {
- 'CAST_FLOAT_TO_INT': ('f', 8, 'i', 4),
- 'CAST_INT_TO_FLOAT': ('i', 4, 'f', 8),
- 'CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4),
- 'CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8),
- 'INT_SIGNEXT': ('i', 0, 'i', 0),
+ 'CAST_FLOAT_TO_INT': ('f', 8, 'i', 4, 2),
+ 'CAST_INT_TO_FLOAT': ('i', 4, 'f', 8, 2),
+ 'CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4, 2),
+ 'CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8, 2),
+ 'INT_SIGNEXT': ('i', 0, 'i', 0, 0),
#'CAST_PTR_TO_INT': ('r', 0, 'i', 4),
#'CAST_INT_TO_PTR': ('i', 4, 'r', 0),
}
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit