Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78324:313bcd2938c3
Date: 2015-06-26 16:34 +0200
http://bitbucket.org/pypy/pypy/changeset/313bcd2938c3/
Log: finishing up changes
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -361,6 +361,7 @@
continue
box_pos, vbox = self.sched_data.getvector_of_box(arg)
if not vbox:
+ import pdb; pdb.set_trace()
# constant/variable expand this box
vbox = self.expand(arg, i)
self.sched_data.setvector_of_box(arg, 0, vbox)
@@ -875,6 +876,11 @@
node.pack = None
node.pack_position = -1
+ def update_pack_of_nodes(self):
+ for i,node in enumerate(self.operations):
+ node.pack = self
+ node.pack_position = i
+
def rightmost_match_leftmost(self, other):
assert isinstance(other, Pack)
rightmost = self.operations[-1]
@@ -889,7 +895,8 @@
return rightmost is leftmost and accum
def __repr__(self):
- return "Pack(%r)" % self.operations
+ opname = self.operations[0].getoperation().getopname()
+ return "Pack(%s,%r)" % (opname, self.operations)
def is_accumulating(self):
return self.accum is not None
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -747,8 +747,12 @@
self.assert_packset_empty(vopt.packset, len(loop.operations),
[(6,12), (5,11), (7,13)])
- @pytest.mark.parametrize("descr,size",
[('char',16),('float',2),('int',2),('singlefloat',4)])
- def test_packset_combine_simple(self,descr,size):
+ @pytest.mark.parametrize("descr,packs,packidx",
+ [('char',1, [(0,(1,3,5,7))]),
+ ('float',2, [(0,(1,3)),(1,(5,7))]),
+ ('int',2, [(0,(1,3)),(1,(5,7))]),
+ ('singlefloat',1,[(0,(1,3,5,7))])])
+ def test_packset_combine_simple(self,descr,packs,packidx):
ops = """
[p0,i0]
i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr)
@@ -758,12 +762,13 @@
loop = self.parse_loop(ops)
vopt = self.combine_packset(loop,3)
assert len(vopt.dependency_graph.memory_refs) == 4
- assert len(vopt.packset.packs) == 16 // size
- self.assert_pack(vopt.packset.packs[0], (1,3,5,7))
+ assert len(vopt.packset.packs) == packs
+ for i,t in packidx:
+ self.assert_pack(vopt.packset.packs[i], t)
- @pytest.mark.parametrize("descr,stride",
- [('char',1),('float',8),('int',8),('singlefloat',4)])
- def test_packset_combine_2_loads_in_trace(self, descr, stride):
+ @pytest.mark.parametrize("descr,stride,packs",
+ [('char',1,1),('float',8,4),('int',8,4),('singlefloat',4,2)])
+ def test_packset_combine_2_loads_in_trace(self, descr, stride,packs):
ops = """
[p0,i0]
i3 = raw_load(p0, i0, descr={type}arraydescr)
@@ -775,24 +780,7 @@
loop = self.parse_loop(ops)
vopt = self.combine_packset(loop,3)
assert len(vopt.dependency_graph.memory_refs) == 8
- assert len(vopt.packset.packs) == (16//stride) * 2
- self.assert_pack(vopt.packset.packs[0], (1,3,5,7,9,11,13,15))
-
- def test_packset_combine_2_loads_one_redundant(self):
- py.test.skip("apply redundant load elimination?")
- ops = """
- [p0,i0]
- i3 = getarrayitem_raw(p0, i0, descr=floatarraydescr)
- i1 = int_add(i0,1)
- i4 = getarrayitem_raw(p0, i1, descr=floatarraydescr)
- jump(p0,i1)
- """
- loop = self.parse_loop(ops)
- vopt = self.combine_packset(loop,3)
- assert len(vopt.dependency_graph.memory_refs) == 8
- assert len(vopt.packset.packs) == 2
- self.assert_pack(vopt.packset.packs[0], (1,5,9))
- self.assert_pack(vopt.packset.packs[1], (3,7,11))
+ assert len(vopt.packset.packs) == packs
def test_packset_combine_no_candidates_packset_empty(self):
ops = """
@@ -847,7 +835,10 @@
loop = self.parse_loop(ops)
vopt = self.combine_packset(loop,3)
assert len(vopt.dependency_graph.memory_refs) == 12
- assert len(vopt.packset.packs) == 4
+ if stride == 8:
+ assert len(vopt.packset.packs) == 8
+ else:
+ assert len(vopt.packset.packs) == 4
for opindices in [(5,12,19,26),(6,13,20,27),
(7,14,21,28),(8,15,22,29)]:
@@ -859,7 +850,6 @@
('float_mul','float',8),
('int_add','int',8),
('int_sub','int',8),
- ('int_mul','int',8),
])
def test_schedule_vector_operation(self, op, descr, stride):
ops = """
@@ -981,7 +971,7 @@
[p0,i0]
guard_early_exit() [p0,i0]
i1 = getarrayitem_raw(p0, i0, descr=floatarraydescr)
- i4 = int_mul(i1, 42)
+ i4 = int_sub(i1, 42)
i3 = int_add(i0,1)
i5 = int_lt(i3, 10)
guard_true(i5) [p0, i0]
@@ -1000,7 +990,7 @@
i4 = int_add(i0, 2)
i5 = int_lt(i2, 10)
v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
- v2 = vec_int_mul(v1, v3)
+ v2 = vec_int_sub(v1, v3)
jump(p0,i2,v3)
"""
vopt = self.vectorize(self.parse_loop(ops),1)
@@ -1011,7 +1001,7 @@
[p0,i0,f3]
guard_early_exit() [p0,i0]
f1 = getarrayitem_raw(p0, i0, descr=floatarraydescr)
- f4 = int_mul(f1, f3)
+ f4 = int_add(f1, f3)
i3 = int_add(i0,1)
i5 = int_lt(i3, 10)
guard_true(i5) [p0, i0]
@@ -1030,7 +1020,7 @@
i4 = int_add(i0, 2)
i5 = int_lt(i2, 10)
v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
- v2 = vec_int_mul(v1, v3)
+ v2 = vec_int_add(v1, v3)
jump(p0,i2,f3,v3)
"""
vopt = self.vectorize(self.parse_loop(ops),1)
@@ -1157,8 +1147,8 @@
i7 = int_add(i1, 4)
i14 = int_ge(i50, 36)
v17 = vec_getarrayitem_raw(p0, i1, 2, descr=floatarraydescr)
+ v19 = vec_cast_float_to_singlefloat(v17)
v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr)
- v19 = vec_cast_float_to_singlefloat(v17)
v20 = vec_cast_float_to_singlefloat(v18)
v21 = vec_float_pack(v19, v20, 2, 2)
vec_setarrayitem_raw(p1, i1, v21, descr=singlefloatarraydescr)
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -374,12 +374,26 @@
self.packset.add_pack(pair)
def combine_packset(self):
+ """ Combination is done iterating the packs that have
+ a sorted op index of the first operation (= left).
+ If a pack is marked as 'full', the next pack that is
+ encountered having the full_pack.right == pack.left,
+ the pack is removed. This is because the packs have
+ intersecting edges.
+ """
if len(self.packset.packs) == 0:
raise NotAVectorizeableLoop()
+ packsort(self.packset.packs).sort()
+ if not we_are_translated():
+ # ensure we are really sorted!
+ x = 0
+ for i,pack in enumerate(self.packset.packs):
+ assert x <= pack.left.getindex()
+ x = pack.left.getindex()
i = 0
j = 0
- packsort(self.packset.packs)
end_ij = len(self.packset.packs)
+ remove_left = {}
while True:
len_before = len(self.packset.packs)
i = 0
@@ -389,14 +403,29 @@
j += 1
continue
pack1 = self.packset.packs[i]
+ pack2 = self.packset.packs[j]
+ # remove intermediate
+ left = pack1.operations[0]
+ if left in remove_left:
+ remove_left[left] = pack1
+ del self.packset.packs[i]
+ end_ij -= 1
+ continue
+ # check if the pack is already full
if pack1.is_full(self.cpu.vector_register_size):
+ pack1.update_pack_of_nodes()
+ right = pack1.operations[-1]
+ remove_left[right] = None
break
- pack2 = self.packset.packs[j]
if pack1.rightmost_match_leftmost(pack2):
end_ij = self.packset.combine(i,j)
- elif pack2.rightmost_match_leftmost(pack1):
- end_ij = self.packset.combine(j,i)
- j += 1
+ else:
+ # do not inc in rightmost_match_leftmost
+ # this could miss some pack
+ j += 1
+ # set for each node to which pack it belongs
+ self.packset.packs[i].update_pack_of_nodes()
+
j = 0
i += 1
if len_before == len(self.packset.packs):
@@ -406,7 +435,15 @@
# some test cases check the accumulation variables
self.packset.accum_vars = {}
print "packs:"
+ check = {}
+ fail = False
for pack in self.packset.packs:
+ left = pack.operations[0]
+ right = pack.operations[-1]
+ if left in check or right in check:
+ fail = True
+ check[left] = None
+ check[right] = None
accum = pack.accum
if accum:
self.packset.accum_vars[accum.var] = accum.pos
@@ -414,6 +451,8 @@
print " %dx %s (accum? %d) " % (len(pack.operations),
pack.operations[0].op.getopname(),
accum is not None)
+ if fail:
+ assert False
def schedule(self, vector=False):
self.guard_early_exit = -1
@@ -463,6 +502,8 @@
def _unpack_from_vector(self, i, arg, sched_data, renamer):
(j, vbox) = sched_data.box_to_vbox.get(arg, (-1, None))
if vbox:
+ if vbox in sched_data.invariant_vector_vars:
+ return arg
arg_cloned = arg.clonebox()
renamer.start_renaming(arg, arg_cloned)
cj = ConstInt(j)
@@ -684,8 +725,6 @@
is not iterated when calling this method. """
pack_i = self.packs[i]
pack_j = self.packs[j]
- pack_i.clear()
- pack_j.clear()
operations = pack_i.operations
for op in pack_j.operations[1:]:
operations.append(op)
@@ -697,16 +736,19 @@
pack.accum = pack_i.accum
pack_i.accum = pack_j.accum = None
+ del self.packs[j]
+ return len(self.packs)
+ # OLD
# instead of deleting an item in the center of pack array,
# the last element is assigned to position j and
# the last slot is freed. Order of packs doesn't matter
- last_pos = len(self.packs) - 1
- if j == last_pos:
- del self.packs[j]
- else:
- self.packs[j] = self.packs[last_pos]
- del self.packs[last_pos]
- return last_pos
+ #last_pos = len(self.packs) - 1
+ #if j == last_pos:
+ # del self.packs[j]
+ #else:
+ # self.packs[j] = self.packs[last_pos]
+ # del self.packs[last_pos]
+ #return last_pos
def accumulates_pair(self, lnode, rnode, origin_pack):
# lnode and rnode are isomorphic and dependent
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit