Author: Richard Plangger <r...@pasra.at> Branch: vecopt-merge Changeset: r79320:5a31e0903f8d Date: 2015-08-31 15:06 +0200 http://bitbucket.org/pypy/pypy/changeset/5a31e0903f8d/
Log: fixed the scheduling tests, two of them where wrong and execute vector instructions on half filled vector registers diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -272,21 +272,12 @@ self.sched_data = sched_data self.vecops = oplist self.costmodel = sched_data.costmodel - # self.input_type = pack.input_type self.output_type = pack.output_type # self.check_if_pack_supported(pack) - - # - if self.must_be_full_but_is_not(pack): - for op in pack.operations: - operation = op.getoperation() - self.sched_data.unpack_from_vector(operation, scheduler) - self.vecops.append(operation) - else: - self.pack = pack - self.transform_pack() + self.pack = pack + self.transform_pack() # self.pack = None self.costmodel = None @@ -295,9 +286,6 @@ self.input_type = None self.output_type = None - def must_be_full_but_is_not(self, pack): - return False - def before_argument_transform(self, args): pass @@ -1008,6 +996,8 @@ packlist.append(newpack) else: newpack.clear() + newpack.operations = [] + break def slice_operations(self, vec_reg_size): count = opcount_filling_vector_register(self, vec_reg_size) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py --- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py @@ -24,8 +24,9 @@ I16 = PackType('i',2,True,8) class FakePackSet(PackSet): - def __init__(self): - self.packs = None + def __init__(self, packs): + self.packs = packs + self.vec_reg_size = 16 class FakeDependencyGraph(DependencyGraph): """ A dependency graph that is able to emit every instruction @@ -115,8 +116,7 @@ pair = Pair(o1,o2,pack.input_type,pack.output_type) pairs.append(pair) - opt.packset = FakePackSet() - opt.packset.packs = pairs + opt.packset = FakePackSet(pairs) if not prepend_invariant: def pio(oplist, labels): @@ -149,7 +149,8 @@ loop2 = self.schedule(loop1, [pack1]) loop3 = self.parse(""" v10[i32|4] = vec_raw_load(p0, i0, 4, descr=float) - v11[i32|2] = vec_raw_load(p0, i4, 2, descr=float) + f10 = raw_load(p0, i4, descr=float) + f11 = raw_load(p0, i5, descr=float) """, False) self.assert_equal(loop2, loop3) @@ -379,17 +380,19 @@ loop1 = self.parse(""" i10 = raw_load(p0, i1, descr=float) i11 = raw_load(p0, i2, descr=float) + i12 = raw_load(p0, i3, descr=float) + i13 = raw_load(p0, i4, descr=float) raw_store(p0, i3, i10, descr=float) raw_store(p0, i4, i11, descr=float) """) - pack1 = self.pack(loop1, 0, 2, None, I32_2) - pack2 = self.pack(loop1, 2, 4, I32_2, None) + pack1 = self.pack(loop1, 0, 4, None, I32) + pack2 = self.pack(loop1, 4, 6, I32_2, None) loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True) loop3 = self.parse(""" - v1[i32|2] = vec_raw_load(p0, i1, 2, descr=float) - i10 = vec_int_unpack(v1[i32|2], 0, 1) + v1[i32|4] = vec_raw_load(p0, i1, 4, descr=float) + i10 = vec_int_unpack(v1[i32|4], 0, 1) raw_store(p0, i3, i10, descr=float) - i11 = vec_int_unpack(v1[i32|2], 1, 1) + i11 = vec_int_unpack(v1[i32|4], 1, 1) raw_store(p0, i4, i11, descr=float) """, False) # unfortunate ui32 is the type for float32... the unsigned u is for @@ -466,5 +469,13 @@ packs.append(pack) assert len(packs) == 2 + def test_combine_packset_nearly_empty_pack(self): + trace = self.parse(""" + i10 = int_add(i1, i3) + i11 = int_add(i2, i3) + """) + pack = self.pack(trace, 0, 2, I16, I16) + packset = FakePackSet([pack]) + packset.split_overloaded_packs() + assert len(packset.packs) == 0 - diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -317,9 +317,7 @@ loop = self.loop operations = loop.operations - self.packset = PackSet(self.dependency_graph, operations, - self.unroll_count, self.smallest_type_bytes, - self.cpu) + self.packset = PackSet(self.cpu.vector_register_size) graph = self.dependency_graph memory_refs = graph.memory_refs.items() # initialize the pack set @@ -422,14 +420,8 @@ j = 0 if len_before == len(self.packset.packs): break - newpacks = [] - vec_reg_size = self.cpu.vector_register_size - for pack in self.packset.packs: - if pack.pack_load(vec_reg_size) > Pack.FULL: - pack.split(newpacks, vec_reg_size) - continue - pack.update_pack_of_nodes() - self.packset.packs.extend(newpacks) + + self.packset.split_overloaded_packs() if not we_are_translated(): # some test cases check the accumulation variables @@ -700,15 +692,10 @@ return False class PackSet(object): - def __init__(self, dependency_graph, operations, unroll_count, - smallest_type_bytes, cpu): + _attrs_ = ('packs', 'vec_reg_size') + def __init__(self, vec_reg_size): self.packs = [] - self.dependency_graph = dependency_graph - self.operations = operations - self.unroll_count = unroll_count - self.smallest_type_bytes = smallest_type_bytes - self.cpu = cpu - self.vec_reg_size = self.cpu.vector_register_size + self.vec_reg_size = vec_reg_size def pack_count(self): return len(self.packs) @@ -898,3 +885,17 @@ sched_data.setvector_of_box(accum.getoriginalbox(), 0, result) # prevent it from expansion renamer.start_renaming(accum.getoriginalbox(), result) + def split_overloaded_packs(self): + newpacks = [] + for i,pack in enumerate(self.packs): + load = pack.pack_load(self.vec_reg_size) + if load > Pack.FULL: + pack.split(newpacks, self.vec_reg_size) + continue + if load < Pack.FULL: + pack.clear() + self.packs[i] = None + continue + pack.update_pack_of_nodes() + self.packs = [pack for pack in self.packs + newpacks if pack] + _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit