Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78324:313bcd2938c3 Date: 2015-06-26 16:34 +0200 http://bitbucket.org/pypy/pypy/changeset/313bcd2938c3/
Log: finishing up changes diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -361,6 +361,7 @@ continue box_pos, vbox = self.sched_data.getvector_of_box(arg) if not vbox: + import pdb; pdb.set_trace() # constant/variable expand this box vbox = self.expand(arg, i) self.sched_data.setvector_of_box(arg, 0, vbox) @@ -875,6 +876,11 @@ node.pack = None node.pack_position = -1 + def update_pack_of_nodes(self): + for i,node in enumerate(self.operations): + node.pack = self + node.pack_position = i + def rightmost_match_leftmost(self, other): assert isinstance(other, Pack) rightmost = self.operations[-1] @@ -889,7 +895,8 @@ return rightmost is leftmost and accum def __repr__(self): - return "Pack(%r)" % self.operations + opname = self.operations[0].getoperation().getopname() + return "Pack(%s,%r)" % (opname, self.operations) def is_accumulating(self): return self.accum is not None diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -747,8 +747,12 @@ self.assert_packset_empty(vopt.packset, len(loop.operations), [(6,12), (5,11), (7,13)]) - @pytest.mark.parametrize("descr,size", [('char',16),('float',2),('int',2),('singlefloat',4)]) - def test_packset_combine_simple(self,descr,size): + @pytest.mark.parametrize("descr,packs,packidx", + [('char',1, [(0,(1,3,5,7))]), + ('float',2, [(0,(1,3)),(1,(5,7))]), + ('int',2, [(0,(1,3)),(1,(5,7))]), + ('singlefloat',1,[(0,(1,3,5,7))])]) + def test_packset_combine_simple(self,descr,packs,packidx): ops = """ [p0,i0] i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr) @@ -758,12 +762,13 @@ loop = self.parse_loop(ops) vopt = self.combine_packset(loop,3) assert len(vopt.dependency_graph.memory_refs) == 4 - assert len(vopt.packset.packs) == 16 // size - self.assert_pack(vopt.packset.packs[0], (1,3,5,7)) + assert len(vopt.packset.packs) == packs + for i,t in packidx: + self.assert_pack(vopt.packset.packs[i], t) - @pytest.mark.parametrize("descr,stride", - [('char',1),('float',8),('int',8),('singlefloat',4)]) - def test_packset_combine_2_loads_in_trace(self, descr, stride): + @pytest.mark.parametrize("descr,stride,packs", + [('char',1,1),('float',8,4),('int',8,4),('singlefloat',4,2)]) + def test_packset_combine_2_loads_in_trace(self, descr, stride,packs): ops = """ [p0,i0] i3 = raw_load(p0, i0, descr={type}arraydescr) @@ -775,24 +780,7 @@ loop = self.parse_loop(ops) vopt = self.combine_packset(loop,3) assert len(vopt.dependency_graph.memory_refs) == 8 - assert len(vopt.packset.packs) == (16//stride) * 2 - self.assert_pack(vopt.packset.packs[0], (1,3,5,7,9,11,13,15)) - - def test_packset_combine_2_loads_one_redundant(self): - py.test.skip("apply redundant load elimination?") - ops = """ - [p0,i0] - i3 = getarrayitem_raw(p0, i0, descr=floatarraydescr) - i1 = int_add(i0,1) - i4 = getarrayitem_raw(p0, i1, descr=floatarraydescr) - jump(p0,i1) - """ - loop = self.parse_loop(ops) - vopt = self.combine_packset(loop,3) - assert len(vopt.dependency_graph.memory_refs) == 8 - assert len(vopt.packset.packs) == 2 - self.assert_pack(vopt.packset.packs[0], (1,5,9)) - self.assert_pack(vopt.packset.packs[1], (3,7,11)) + assert len(vopt.packset.packs) == packs def test_packset_combine_no_candidates_packset_empty(self): ops = """ @@ -847,7 +835,10 @@ loop = self.parse_loop(ops) vopt = self.combine_packset(loop,3) assert len(vopt.dependency_graph.memory_refs) == 12 - assert len(vopt.packset.packs) == 4 + if stride == 8: + assert len(vopt.packset.packs) == 8 + else: + assert len(vopt.packset.packs) == 4 for opindices in [(5,12,19,26),(6,13,20,27), (7,14,21,28),(8,15,22,29)]: @@ -859,7 +850,6 @@ ('float_mul','float',8), ('int_add','int',8), ('int_sub','int',8), - ('int_mul','int',8), ]) def test_schedule_vector_operation(self, op, descr, stride): ops = """ @@ -981,7 +971,7 @@ [p0,i0] guard_early_exit() [p0,i0] i1 = getarrayitem_raw(p0, i0, descr=floatarraydescr) - i4 = int_mul(i1, 42) + i4 = int_sub(i1, 42) i3 = int_add(i0,1) i5 = int_lt(i3, 10) guard_true(i5) [p0, i0] @@ -1000,7 +990,7 @@ i4 = int_add(i0, 2) i5 = int_lt(i2, 10) v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr) - v2 = vec_int_mul(v1, v3) + v2 = vec_int_sub(v1, v3) jump(p0,i2,v3) """ vopt = self.vectorize(self.parse_loop(ops),1) @@ -1011,7 +1001,7 @@ [p0,i0,f3] guard_early_exit() [p0,i0] f1 = getarrayitem_raw(p0, i0, descr=floatarraydescr) - f4 = int_mul(f1, f3) + f4 = int_add(f1, f3) i3 = int_add(i0,1) i5 = int_lt(i3, 10) guard_true(i5) [p0, i0] @@ -1030,7 +1020,7 @@ i4 = int_add(i0, 2) i5 = int_lt(i2, 10) v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr) - v2 = vec_int_mul(v1, v3) + v2 = vec_int_add(v1, v3) jump(p0,i2,f3,v3) """ vopt = self.vectorize(self.parse_loop(ops),1) @@ -1157,8 +1147,8 @@ i7 = int_add(i1, 4) i14 = int_ge(i50, 36) v17 = vec_getarrayitem_raw(p0, i1, 2, descr=floatarraydescr) + v19 = vec_cast_float_to_singlefloat(v17) v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr) - v19 = vec_cast_float_to_singlefloat(v17) v20 = vec_cast_float_to_singlefloat(v18) v21 = vec_float_pack(v19, v20, 2, 2) vec_setarrayitem_raw(p1, i1, v21, descr=singlefloatarraydescr) diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -374,12 +374,26 @@ self.packset.add_pack(pair) def combine_packset(self): + """ Combination is done iterating the packs that have + a sorted op index of the first operation (= left). + If a pack is marked as 'full', the next pack that is + encountered having the full_pack.right == pack.left, + the pack is removed. This is because the packs have + intersecting edges. + """ if len(self.packset.packs) == 0: raise NotAVectorizeableLoop() + packsort(self.packset.packs).sort() + if not we_are_translated(): + # ensure we are really sorted! + x = 0 + for i,pack in enumerate(self.packset.packs): + assert x <= pack.left.getindex() + x = pack.left.getindex() i = 0 j = 0 - packsort(self.packset.packs) end_ij = len(self.packset.packs) + remove_left = {} while True: len_before = len(self.packset.packs) i = 0 @@ -389,14 +403,29 @@ j += 1 continue pack1 = self.packset.packs[i] + pack2 = self.packset.packs[j] + # remove intermediate + left = pack1.operations[0] + if left in remove_left: + remove_left[left] = pack1 + del self.packset.packs[i] + end_ij -= 1 + continue + # check if the pack is already full if pack1.is_full(self.cpu.vector_register_size): + pack1.update_pack_of_nodes() + right = pack1.operations[-1] + remove_left[right] = None break - pack2 = self.packset.packs[j] if pack1.rightmost_match_leftmost(pack2): end_ij = self.packset.combine(i,j) - elif pack2.rightmost_match_leftmost(pack1): - end_ij = self.packset.combine(j,i) - j += 1 + else: + # do not inc in rightmost_match_leftmost + # this could miss some pack + j += 1 + # set for each node to which pack it belongs + self.packset.packs[i].update_pack_of_nodes() + j = 0 i += 1 if len_before == len(self.packset.packs): @@ -406,7 +435,15 @@ # some test cases check the accumulation variables self.packset.accum_vars = {} print "packs:" + check = {} + fail = False for pack in self.packset.packs: + left = pack.operations[0] + right = pack.operations[-1] + if left in check or right in check: + fail = True + check[left] = None + check[right] = None accum = pack.accum if accum: self.packset.accum_vars[accum.var] = accum.pos @@ -414,6 +451,8 @@ print " %dx %s (accum? %d) " % (len(pack.operations), pack.operations[0].op.getopname(), accum is not None) + if fail: + assert False def schedule(self, vector=False): self.guard_early_exit = -1 @@ -463,6 +502,8 @@ def _unpack_from_vector(self, i, arg, sched_data, renamer): (j, vbox) = sched_data.box_to_vbox.get(arg, (-1, None)) if vbox: + if vbox in sched_data.invariant_vector_vars: + return arg arg_cloned = arg.clonebox() renamer.start_renaming(arg, arg_cloned) cj = ConstInt(j) @@ -684,8 +725,6 @@ is not iterated when calling this method. """ pack_i = self.packs[i] pack_j = self.packs[j] - pack_i.clear() - pack_j.clear() operations = pack_i.operations for op in pack_j.operations[1:]: operations.append(op) @@ -697,16 +736,19 @@ pack.accum = pack_i.accum pack_i.accum = pack_j.accum = None + del self.packs[j] + return len(self.packs) + # OLD # instead of deleting an item in the center of pack array, # the last element is assigned to position j and # the last slot is freed. Order of packs doesn't matter - last_pos = len(self.packs) - 1 - if j == last_pos: - del self.packs[j] - else: - self.packs[j] = self.packs[last_pos] - del self.packs[last_pos] - return last_pos + #last_pos = len(self.packs) - 1 + #if j == last_pos: + # del self.packs[j] + #else: + # self.packs[j] = self.packs[last_pos] + # del self.packs[last_pos] + #return last_pos def accumulates_pair(self, lnode, rnode, origin_pack): # lnode and rnode are isomorphic and dependent _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit