Author: Richard Plangger <planri...@gmail.com> Branch: vecopt-merge Changeset: r79658:fe1eb22de735 Date: 2015-09-16 18:06 +0200 http://bitbucket.org/pypy/pypy/changeset/fe1eb22de735/
Log: forcing memo of variables while parsing to the same memo at runtime (only jitviewer and the test suite affected) poking costmodel tests, accumulator in the algorthim missing diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -356,7 +356,7 @@ pack = '' if self.pack: pack = "p: %d" % self.pack.numops() - return "Node(%s,%s i: %d)" % (self.op.getopname(), pack, self.opidx) + return "Node(%s,%s i: %d)" % (self.op, pack, self.opidx) def __ne__(self, other): return not self.__eq__(other) diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -7,6 +7,7 @@ from rpython.jit.metainterp.optimizeopt.renamer import Renamer from rpython.rlib.objectmodel import we_are_translated from rpython.jit.metainterp.jitexc import NotAProfitableLoop +from rpython.rlib.objectmodel import specialize class SchedulerState(object): @@ -78,7 +79,7 @@ state.renamer.rename(op) if unpack: state.ensure_args_unpacked(op) - node.position = len(state.oplist) + node.vector=Trueposition = len(state.oplist) worklist = state.worklist for dep in node.provides()[:]: # COPY to = dep.to @@ -131,115 +132,6 @@ for node in state.graph.nodes: assert node.emitted -#UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT, -# rop.UINT_LT, rop.UINT_LE, -# rop.UINT_GT, rop.UINT_GE) - -#class Type(object): -# """ The type of one operation. Saves type, size and sign. """ -# @staticmethod -# def of(op): -# descr = op.getdescr() -# if descr: -# type = INT -# if descr.is_array_of_floats() or descr.concrete_type == FLOAT: -# type = FLOAT -# size = descr.get_item_size_in_bytes() -# sign = descr.is_item_signed() -# return Type(type, size, sign) -# else: -# size = 8 -# sign = True -# if op.type == 'f' or op.getopnum() in UNSIGNED_OPS: -# sign = False -# return Type(op.type, size, sign) -# -# def __init__(self, type, size, signed): -# assert type in (FLOAT, INT) -# self.type = type -# self.size = size -# self.signed = signed -# -# def bytecount(self): -# return self.size -# -# def clone(self): -# return Type(self.type, self.size, self.signed) -# -# def __repr__(self): -# sign = '-' -# if not self.signed: -# sign = '+' -# return 'Type(%s%s, %d)' % (sign, self.type, self.size) -# - #UNKNOWN_TYPE = '-' - - #@staticmethod - #def of(box, count=-1): - # assert box.type == 'V' - # if count == -1: - # count = box.getcount() - # return Type(box.gettype(), box.getsize(), box.getsigned(), count) - - #@staticmethod - #def by_descr(descr, vec_reg_size): - # _t = INT - # signed = descr.is_item_signed() - # if descr.is_array_of_floats() or descr.concrete_type == FLOAT: - # _t = FLOAT - # signed = False - # size = descr.get_item_size_in_bytes() - # pt = Type(_t, size, signed, vec_reg_size // size) - # return pt - - #def clone(self): - # return Type(self.type, self.size, self.signed, self.count) - - #def new_vector_box(self, count = -1): - # if count == -1: - # count = self.count - # assert count > 1 - # assert self.type in ('i','f') - # assert self.size > 0 - # xxx - # return BoxVector(self.type, count, self.size, self.signed) - - #def combine(self, other): - # """ nothing to be done here """ - # if not we_are_translated(): - # assert self.type == other.type - # assert self.signed == other.signed - - - #def byte_size(self): - # return self.count * self.size - - #def setsize(self, size): - # self.size = size - - #def setcount(self, count): - # self.count = count - - #def gettype(self): - # return self.type - - #def getsize(self): - # return self.size - - #def getcount(self): - # return self.count - - - -class TypeOutput(object): - def __init__(self, type, count): - self.type = type - self.count = count - - - def bytecount(self): - return self.count * self.type.bytecount() - class TypeRestrict(object): ANY_TYPE = -1 ANY_SIZE = -1 @@ -273,13 +165,6 @@ TR_LONG = TypeRestrict(INT, 8, 2) TR_INT_2 = TypeRestrict(INT, 4, 2) - #INT = OpToVectorOp((TR_ANY_INTEGER, TR_ANY_INTEGER), DT_PASS) - #FLOAT = OpToVectorOp((TR_ANY_FLOAT, TR_ANY_FLOAT), DT_PASS) - #FLOAT_UNARY = OpToVectorOp((TR_ANY_FLOAT,), DT_PASS) - #LOAD = LoadToVectorLoad() - #STORE = StoreToVectorStore() - #GUARD = PassThroughOp((TR_ANY_INTEGER,)) - # note that the following definition is x86 arch specific MAPPING = { rop.VEC_INT_ADD: [TR_ANY_INTEGER, TR_ANY_INTEGER], @@ -318,11 +203,6 @@ rop.VEC_INT_IS_TRUE: [TR_ANY_INTEGER,TR_ANY_INTEGER], } - # TODO? - UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT, - rop.UINT_LT, rop.UINT_LE, - rop.UINT_GT, rop.UINT_GE) - def turn_into_vector(state, pack): """ Turn a pack into a vector instruction """ # @@ -412,39 +292,6 @@ # self.input_type.getsize() != vecop.getsize(): # vecop = self.extend(vecop, self.input_type) - # use the input as an indicator for the pack type - #packable = vecop.maximum_numops() - #packed = vecop.count - #assert packed >= 0 - #assert packable >= 0 - #if packed > packable: - # # the argument has more items than the operation is able to process! - # # pos == 0 then it is already at the right place - # if pos != 0: - # args[i] = self.unpack(vecop, pos, packed - pos, self.input_type) - # state.remember_args_in_vector(i, args[i]) - # #self.update_input_output(self.pack) - # continue - # else: - # assert vecop is not None - # args[i] = vecop - # continue - #vboxes = self.vector_boxes_for_args(i) - #if packed < packable and len(vboxes) > 1: - # # the argument is scattered along different vector boxes - # args[i] = self.gather(vboxes, packable) - # state.remember_args_in_vector(i, args[i]) - # continue - #if pos != 0: - # # The vector box is at a position != 0 but it - # # is required to be at position 0. Unpack it! - # args[i] = self.unpack(vecop, pos, packed - pos, self.input_type) - # state.remember_args_in_vector(i, args[i]) - # continue - ## - #assert vecop is not None - #args[i] = vecop - def check_if_pack_supported(self, pack): op0 = pack.operations[0].getoperation() if self.input_type is None: @@ -461,25 +308,6 @@ # see assembler for comment why raise NotAProfitableLoop -def extend(self, vbox, newtype): - assert vbox.gettype() == newtype.gettype() - if vbox.gettype() == INT: - return self.extend_int(vbox, newtype) - else: - raise NotImplementedError("cannot yet extend float") - -def extend_int(self, vbox, newtype): - vbox_cloned = newtype.new_vector_box(vbox.getcount()) - self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize()) - newsize = newtype.getsize() - assert newsize > 0 - op = ResOperation(rop.VEC_INT_SIGNEXT, - [vbox, ConstInt(newsize)], - vbox_cloned) - self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), vbox.getcount()) - self.vecops.append(op) - return vbox_cloned - def unpack_from_vector(state, arg, index, count): """ Extract parts of the vector box into another vector box """ print "unpack i", index, "c", count, "v", arg @@ -556,9 +384,6 @@ if variables is not None: variables.append(vecop) state.expand([arg], vecop) - #expanded_map.setdefault(arg,[]).append((vecop, -1)) - #for i in range(vecop.count): - # state.setvector_of_box(arg, i, vecop) args[index] = vecop return vecop @@ -642,7 +467,7 @@ SchedulerState.post_schedule(self) # add accumulation info to the descriptor - #for version in self.loop.versions: + # TODO for version in self.loop.versions: # # this needs to be done for renamed (accum arguments) # version.renamed_inputargs = [ renamer.rename_map.get(arg,arg) for arg in version.inputargs ] #self.appended_arg_count = len(sched_data.invariant_vector_vars) @@ -717,7 +542,7 @@ if argument and not argument.is_constant(): arg = self.ensure_unpacked(i, argument) if argument is not arg: - fail_arguments[i] = arg + fail_args[i] = arg def ensure_unpacked(self, index, arg): if arg in self.seen or arg.is_vector(): @@ -756,9 +581,8 @@ break self.setvector_of_box(arg, i, box) - def opcount_filling_vector_register(pack, vec_reg_size): - """ how many operations of that kind can one execute + """ How many operations of that kind can one execute with a machine instruction of register size X? """ op = pack.leftmost() @@ -790,10 +614,16 @@ def numops(self): return len(self.operations) - def leftmost(self): + @specialize.arg(1) + def leftmost(self, node=False): + if node: + return self.operations[0] return self.operations[0].getoperation() - def rightmost(self): + @specialize.arg(1) + def rightmost(self, node=False): + if node: + return self.operations[-1] return self.operations[-1].getoperation() def pack_type(self): @@ -933,7 +763,7 @@ def __repr__(self): if len(self.operations) == 0: return "Pack(empty)" - return "Pack(%dx %s)" % (self.numops(), self.operations[0]) + return "Pack(%dx %s)" % (self.numops(), self.operations) def is_accumulating(self): return self.accum is not None @@ -943,14 +773,11 @@ cloned.accum = self.accum return cloned - class Pair(Pack): """ A special Pack object with only two statements. """ def __init__(self, left, right): assert isinstance(left, Node) assert isinstance(right, Node) - self.left = left - self.right = right Pack.__init__(self, [left, right]) def __eq__(self, other): @@ -960,246 +787,28 @@ class AccumPair(Pair): """ A pair that keeps track of an accumulation value """ - def __init__(self, left, right, input_type, output_type, accum): + def __init__(self, left, right, accum): assert isinstance(left, Node) assert isinstance(right, Node) - Pair.__init__(self, left, right, input_type, output_type) - self.left = left - self.right = right + Pair.__init__(self, left, right) self.accum = accum -#class OpToVectorOp(object): -# def __init__(self): #, restrictargs, typeoutput): -# pass -# #self.args = list(restrictargs) # do not use a tuple. rpython cannot union -# #self.out = typeoutput +#def extend(self, vbox, newtype): +# assert vbox.gettype() == newtype.gettype() +# if vbox.gettype() == INT: +# return self.extend_int(vbox, newtype) +# else: +# raise NotImplementedError("cannot yet extend float") # -#class OpToVectorOpConv(OpToVectorOp): -# def __init__(self, intype, outtype): -# #self.from_size = intype.getsize() -# #self.to_size = outtype.getsize() -# #OpToVectorOp.__init__(self, (intype, ), outtype) -# pass -# -# def new_result_vector_box(self): -# type = self.output_type.gettype() -# size = self.to_size -# count = self.output_type.getcount() -# vec_reg_size = self.sched_data.vec_reg_size -# if count * size > vec_reg_size: -# count = vec_reg_size // size -# signed = self.output_type.signed -# assert type in ('i','f') -# assert size > 0 -# assert count > 1 -# return BoxVector(type, count, size, signed) -# -# def get_output_type_given(self, input_type, op): -# return self.result_ptype -# -# def get_input_type_given(self, output_type, op): -# return self.arg_ptypes[0] -# -# def force_input(self, ptype): -# return self.arg_ptypes[0] -# -#class SignExtToVectorOp(OpToVectorOp): -# def __init__(self, intype, outtype): -# OpToVectorOp.__init__(self, intype, outtype) -# self.size = -1 -# -# def before_argument_transform(self, args): -# sizearg = args[1] -# assert isinstance(sizearg, ConstInt) -# self.size = sizearg.value -# -# def new_result_vector_box(self): -# type = self.output_type.gettype() -# count = self.input_type.getcount() -# vec_reg_size = self.sched_data.vec_reg_size -# if count * self.size > vec_reg_size: -# count = vec_reg_size // self.size -# signed = self.input_type.signed -# assert type in ('i','f') -# assert self.size > 0 -# assert count > 1 -# return BoxVector(type, count, self.size, signed) -# -# def get_output_type_given(self, input_type, op): -# sizearg = op.getarg(1) -# assert isinstance(sizearg, ConstInt) -# output_type = input_type.clone() -# output_type.setsize(sizearg.value) -# return output_type -# -# def get_input_type_given(self, output_type, op): -# raise AssertionError("can never infer input type!") -# -#class LoadToVectorLoad(OpToVectorOp): -# def __init__(self): -# OpToVectorOp.__init__(self, (), TypeRestrict()) -# -# # OLD def before_argument_transform(self, args): -# #count = min(self.output_type.getcount(), len(self.getoperations())) -# #args.append(ConstInt(count)) -# -# def get_output_type_given(self, input_type, op): -# return xxx#Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size) -# -# def get_input_type_given(self, output_type, op): -# return None -# -#class StoreToVectorStore(OpToVectorOp): -# """ Storing operations are special because they are not allowed -# to store to memory if the vector is not fully filled. -# Thus a modified split_pack function. -# """ -# def __init__(self): -# OpToVectorOp.__init__(self, (None, None, TypeRestrict()), None) -# self.has_descr = True -# -# def must_be_full_but_is_not(self, pack): -# vrs = self.sched_data.vec_reg_size -# it = pack.input_type -# return it.getsize() * it.getcount() < vrs -# -# def get_output_type_given(self, input_type, op): -# return None -# -# def get_input_type_given(self, output_type, op): -# return xxx#Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size) -# -#class PassThroughOp(OpToVectorOp): -# """ This pass through is only applicable if the target -# operation is capable of handling vector operations. -# Guard true/false is such an example. -# """ -# def __init__(self, args): -# OpToVectorOp.__init__(self, args, None) -# -# def get_output_type_given(self, input_type, op): -# return None -# -# def get_input_type_given(self, output_type, op): -# raise AssertionError("cannot infer input type from output type") -# -# -# -##def determine_input_output_types(pack, node, forward): -## """ This function is two fold. If moving forward, it -## gets an input type from the packs output type and returns -## the transformed packtype. -## -## Moving backward, the origins pack input type is the output -## type and the transformation of the packtype (in reverse direction) -## is the input -## """ -## op = node.getoperation() -## op2vecop = determine_trans(op) -## if forward: -## input_type = op2vecop.force_input(pack.output_type) -## output_type = op2vecop.get_output_type_given(input_type, op) -## if output_type: -## output_type = output_type.clone() -## else: -## # going backwards, things are not that easy anymore -## output_type = pack.input_type -## input_type = op2vecop.get_input_type_given(output_type, op) -## if input_type: -## input_type = input_type.clone() -## -## return input_type, output_type -# -#def determine_trans(op): -# op2vecop = trans.MAPPING.get(op.vector, None) -# if op2vecop is None: -# raise NotImplementedError("missing vecop for '%s'" % (op.getopname(),)) -# return op2vecop +#def extend_int(self, vbox, newtype): +# vbox_cloned = newtype.new_vector_box(vbox.getcount()) +# self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize()) +# newsize = newtype.getsize() +# assert newsize > 0 +# op = ResOperation(rop.VEC_INT_SIGNEXT, +# [vbox, ConstInt(newsize)], +# vbox_cloned) +# self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), vbox.getcount()) +# self.vecops.append(op) +# return vbox_cloned - -#def before_argument_transform(self, args): -# pass - -#def transform_result(self, result): -# if result is None: -# return None -# vbox = self.new_result_vector_box() -# # -# # mark the position and the vbox in the hash -# for i, node in enumerate(self.getoperations()): -# if i >= vbox.getcount(): -# break -# op = node.getoperation() -# self.sched_data.setvector_of_box(op, i, vbox) -# return vbox - -#def new_result_vector_box(self): -# type = self.output_type.gettype() -# size = self.output_type.getsize() -# count = min(self.output_type.getcount(), len(self.pack.operations)) -# signed = self.output_type.signed -# return BoxVector(type, count, size, signed) - -#def getoperations(self): -# return self.pack.operations - -#def transform_arguments(self, args): -# """ Transforming one argument to a vector box argument -# The following cases can occur: -# 1) argument is present in the box_to_vbox map. -# a) vector can be reused immediatly (simple case) -# b) vector is to big -# c) vector is to small -# 2) argument is not known to reside in a vector -# a) expand vars/consts before the label and add as argument -# b) expand vars created in the loop body -# """ -# for i,arg in enumerate(args): -# if arg.returns_vector(): -# continue -# if not self.is_vector_arg(i): -# continue -# box_pos, vbox = self.sched_data.getvector_of_box(arg) -# if not vbox: -# # constant/variable expand this box -# vbox = self.expand(arg, i) -# self.sched_data.setvector_of_box(arg, 0, vbox) -# box_pos = 0 -# # convert size i64 -> i32, i32 -> i64, ... -# if self.input_type.getsize() > 0 and \ -# self.input_type.getsize() != vbox.getsize(): -# vbox = self.extend(vbox, self.input_type) - -# # use the input as an indicator for the pack type -# packable = self.input_type.getcount() -# packed = vbox.getcount() -# assert packed >= 0 -# assert packable >= 0 -# if packed > packable: -# # the argument has more items than the operation is able to process! -# # box_pos == 0 then it is already at the right place -# if box_pos != 0: -# args[i] = self.unpack(vbox, box_pos, packed - box_pos, self.input_type) -# remember_args_in_vector(i, args[i]) -# #self.update_input_output(self.pack) -# continue -# else: -# assert vbox is not None -# args[i] = vbox -# continue -# vboxes = self.vector_boxes_for_args(i) -# if packed < packable and len(vboxes) > 1: -# # the argument is scattered along different vector boxes -# args[i] = self.gather(vboxes, packable) -# remember_args_in_vector(i, args[i]) -# continue -# if box_pos != 0: -# # The vector box is at a position != 0 but it -# # is required to be at position 0. Unpack it! -# args[i] = self.unpack(vbox, box_pos, packed - box_pos, self.input_type) -# remember_args_in_vector(i, args[i]) -# continue -# #self.update_input_output(self.pack) -# # -# assert vbox is not None -# args[i] = vbox diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py --- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py @@ -2,14 +2,15 @@ from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop from rpython.jit.metainterp.optimizeopt.util import equaloplists -from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData, - Pack, NotAProfitableLoop, VectorizingOptimizer) +from rpython.jit.metainterp.optimizeopt.vector import (Pack, X86_CostModel, + NotAProfitableLoop, VectorizingOptimizer) +from rpython.jit.metainterp.optimizeopt.schedule import VecScheduleState from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin from rpython.jit.metainterp.optimizeopt.test.test_schedule import SchedulerBaseTest -from rpython.jit.metainterp.optimizeopt.test.test_vectorize import (FakeMetaInterpStaticData, +from rpython.jit.metainterp.optimizeopt.test.test_vecopt import (FakeMetaInterpStaticData, FakeJitDriverStaticData) -from rpython.jit.metainterp.resoperation import rop, ResOperation +from rpython.jit.metainterp.resoperation import rop, ResOperation, AbstractValue from rpython.jit.tool.oparser import parse as opparse from rpython.jit.tool.oparser_model import get_model @@ -18,7 +19,7 @@ self.index_var = iv self.array = array - def is_adjacent_to(self, other): + def is_adjacent_after(self, other): if self.array is not other.array: return False iv = self.index_var @@ -28,36 +29,39 @@ # i1 and i0 ... # but not i0, i2 # ... - return abs(val) == 1 + print iv, 'is after', ov, "?", val == 1 + return val == 1 class CostModelBaseTest(SchedulerBaseTest): + def savings(self, loop): metainterp_sd = FakeMetaInterpStaticData(self.cpu) jitdriver_sd = FakeJitDriverStaticData() - opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, []) - label_index = loop.find_first_index(rop.LABEL) - opt.orig_label_args = loop.operations[label_index].getarglist()[:] + opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0) + opt.orig_label_args = loop.label.getarglist()[:] graph = opt.dependency_graph = DependencyGraph(loop) + self.show_dot_graph(graph, 'costmodel') for k,m in graph.memory_refs.items(): graph.memory_refs[k] = FakeMemoryRef(m.array, m.index_var) - opt.find_adjacent_memory_refs() + opt.find_adjacent_memory_refs(graph) opt.extend_packset() opt.combine_packset() for pack in opt.packset.packs: print "pack: \n ", print '\n '.join([str(op.getoperation()) for op in pack.operations]) print - opt.costmodel.reset_savings() - opt.schedule(True) - return opt.costmodel.savings + costmodel = X86_CostModel(self.cpu, 0) + state = VecScheduleState(graph, opt.packset, self.cpu, costmodel) + opt.schedule(state) + return costmodel.savings def assert_operations_match(self, loop_a, loop_b): assert equaloplists(loop_a.operations, loop_b.operations) def test_load_2_unpack(self): - loop1 = self.parse(""" - f10 = raw_load(p0, i0, descr=double) - f11 = raw_load(p0, i1, descr=double) + loop1 = self.parse_trace(""" + f10 = raw_load_f(p0, i0, descr=double) + f11 = raw_load_f(p0, i1, descr=double) guard_true(i0) [f10] guard_true(i1) [f11] """) @@ -68,11 +72,11 @@ assert savings == -2 def test_load_4_unpack(self): - loop1 = self.parse(""" - i10 = raw_load(p0, i0, descr=float) - i11 = raw_load(p0, i1, descr=float) - i12 = raw_load(p0, i2, descr=float) - i13 = raw_load(p0, i3, descr=float) + loop1 = self.parse_trace(""" + i10 = raw_load_i(p0, i0, descr=float) + i11 = raw_load_i(p0, i1, descr=float) + i12 = raw_load_i(p0, i2, descr=float) + i13 = raw_load_i(p0, i3, descr=float) guard_true(i0) [i10] guard_true(i1) [i11] guard_true(i2) [i12] @@ -82,29 +86,29 @@ assert savings == -1 def test_load_2_unpack_1(self): - loop1 = self.parse(""" - f10 = raw_load(p0, i0, descr=double) - f11 = raw_load(p0, i1, descr=double) + loop1 = self.parse_trace(""" + f10 = raw_load_f(p0, i0, descr=double) + f11 = raw_load_f(p0, i1, descr=double) guard_true(i0) [f10] """) savings = self.savings(loop1) assert savings == 0 def test_load_2_unpack_1_index1(self): - loop1 = self.parse(""" - f10 = raw_load(p0, i0, descr=double) - f11 = raw_load(p0, i1, descr=double) + loop1 = self.parse_trace(""" + f10 = raw_load_f(p0, i0, descr=double) + f11 = raw_load_f(p0, i1, descr=double) guard_true(i0) [f11] """) savings = self.savings(loop1) assert savings == -1 - def test_load_arith(self): - loop1 = self.parse(""" - i10 = raw_load(p0, i0, descr=int) - i11 = raw_load(p0, i1, descr=int) - i12 = raw_load(p0, i2, descr=int) - i13 = raw_load(p0, i3, descr=int) + def test_load_arith1(self): + loop1 = self.parse_trace(""" + i10 = raw_load_i(p0, i0, descr=int) + i11 = raw_load_i(p0, i1, descr=int) + i12 = raw_load_i(p0, i2, descr=int) + i13 = raw_load_i(p0, i3, descr=int) i15 = int_add(i10, 1) i16 = int_add(i11, 1) i17 = int_add(i12, 1) @@ -114,9 +118,9 @@ assert savings == 6 def test_load_arith_store(self): - loop1 = self.parse(""" - f10 = raw_load(p0, i0, descr=double) - f11 = raw_load(p0, i1, descr=double) + loop1 = self.parse_trace(""" + f10 = raw_load_f(p0, i0, descr=double) + f11 = raw_load_f(p0, i1, descr=double) i20 = cast_float_to_int(f10) i21 = cast_float_to_int(f11) i30 = int_signext(i20, 4) @@ -128,9 +132,9 @@ assert savings >= 0 def test_sum(self): - loop1 = self.parse(""" - f10 = raw_load(p0, i0, descr=double) - f11 = raw_load(p0, i1, descr=double) + loop1 = self.parse_trace(""" + f10 = raw_load_f(p0, i0, descr=double) + f11 = raw_load_f(p0, i1, descr=double) f12 = float_add(f1, f10) f13 = float_add(f12, f11) """) @@ -139,9 +143,9 @@ @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,0)]) def test_sum_float_to_int(self, bytes, s): - loop1 = self.parse(""" - f10 = raw_load(p0, i0, descr=double) - f11 = raw_load(p0, i1, descr=double) + loop1 = self.parse_trace(""" + f10 = raw_load_f(p0, i0, descr=double) + f11 = raw_load_f(p0, i1, descr=double) i10 = cast_float_to_int(f10) i11 = cast_float_to_int(f11) i12 = int_signext(i10, {c}) @@ -166,20 +170,20 @@ py.test.fail("must not fail") def test_cast(self): - loop1 = self.parse(""" - i100 = raw_load(p0, i1, descr=float) - i101 = raw_load(p0, i2, descr=float) - i102 = raw_load(p0, i3, descr=float) - i103 = raw_load(p0, i4, descr=float) + loop1 = self.parse_trace(""" + i100 = raw_load_i(p0, i1, descr=float) + i101 = raw_load_i(p0, i2, descr=float) + i102 = raw_load_i(p0, i3, descr=float) + i103 = raw_load_i(p0, i4, descr=float) # - i104 = raw_load(p1, i1, descr=short) - i105 = raw_load(p1, i2, descr=short) - i106 = raw_load(p1, i3, descr=short) - i107 = raw_load(p1, i4, descr=short) - i108 = raw_load(p1, i5, descr=short) - i109 = raw_load(p1, i6, descr=short) - i110 = raw_load(p1, i7, descr=short) - i111 = raw_load(p1, i8, descr=short) + i104 = raw_load_i(p1, i1, descr=short) + i105 = raw_load_i(p1, i2, descr=short) + i106 = raw_load_i(p1, i3, descr=short) + i107 = raw_load_i(p1, i4, descr=short) + i108 = raw_load_i(p1, i5, descr=short) + i109 = raw_load_i(p1, i6, descr=short) + i110 = raw_load_i(p1, i7, descr=short) + i111 = raw_load_i(p1, i8, descr=short) # f100 = cast_int_to_float(i104) f101 = cast_int_to_float(i105) @@ -192,7 +196,7 @@ """) try: self.savings(loop1) - py.test.fail("must not profitable!") + py.test.fail("must not be profitable!") except NotAProfitableLoop: pass diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py --- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py @@ -6,6 +6,7 @@ from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, Dependency, IndexVar, MemoryRef, Node) +from rpython.jit.metainterp.compile import ResumeAtLoopHeaderDescr from rpython.jit.metainterp.optimizeopt.vector import VectorLoop from rpython.jit.metainterp.resoperation import rop, ResOperation from rpython.jit.backend.llgraph.runner import ArrayDescr @@ -54,7 +55,7 @@ loop.jump.setdescr(token) for op in loop.operations: if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None: - op.setdescr(compile.ResumeAtLoopHeaderDescr()) + op.setdescr(ResumeAtLoopHeaderDescr()) return loop def assert_edges(self, graph, edge_list, exceptions): diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -29,6 +29,8 @@ from rpython.rlib.debug import debug_print, debug_start, debug_stop from rpython.rlib.jit import Counters from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.jit.backend.llsupport.symbolic import (WORD as INT_WORD, + SIZEOF_FLOAT as FLOAT_WORD) class VectorLoop(object): def __init__(self, label, oplist, jump): @@ -188,7 +190,7 @@ # vectorize graph = DependencyGraph(loop) - self.find_adjacent_memory_refs() + self.find_adjacent_memory_refs(graph) self.extend_packset() self.combine_packset() # TODO move cost model to CPU @@ -256,7 +258,7 @@ if op.getopnum() in prohibit_opnums: continue # do not unroll this operation twice copied_op = op.clone() - if copied_op.result is not None: + if not copied_op.returns_void(): # every result assigns a new box, thus creates an entry # to the rename map. new_assigned_box = copied_op.result.clonebox() @@ -323,7 +325,7 @@ They are represented as a linear combination: i*c/d + e, i is a variable, all others are integers that are calculated in reverse direction """ - loop = self.loop + loop = graph.loop operations = loop.operations self.packset = PackSet(self.cpu.vector_register_size) @@ -338,8 +340,10 @@ # exclue a_opidx == b_opidx only consider the ones # that point forward: if memref_a.is_adjacent_after(memref_b): + print node_a.getindex(), "is after", node_b.getindex() pair = self.packset.can_be_packed(node_a, node_b, None, False) if pair: + print "creating mem pair", pair self.packset.add_pack(pair) def extend_packset(self): @@ -348,26 +352,33 @@ """ pack_count = self.packset.pack_count() while True: - for pack in self.packset.packs: + i = 0 + packs = self.packset.packs + while i < len(packs): + pack = packs[i] self.follow_def_uses(pack) + i += 1 if pack_count == self.packset.pack_count(): pack_count = self.packset.pack_count() - for pack in self.packset.packs: + i = 0 + while i < len(packs): + pack = packs[i] self.follow_use_defs(pack) + i += 1 if pack_count == self.packset.pack_count(): break pack_count = self.packset.pack_count() def follow_use_defs(self, pack): assert isinstance(pack, Pair) - for ldep in pack.left.depends(): - for rdep in pack.right.depends(): + for ldep in pack.leftmost(True).depends(): + for rdep in pack.rightmost(True).depends(): lnode = ldep.to rnode = rdep.to - # only valid if the result of the left is in args of pack left - result = lnode.getoperation().result - args = pack.left.getoperation().getarglist() - if result is None or result not in args: + # only valid if left is in args of pack left + left = lnode.getoperation() + args = pack.leftmost().getarglist() + if left is None or left not in args: continue isomorph = isomorphic(lnode.getoperation(), rnode.getoperation()) if isomorph and lnode.is_before(rnode): @@ -377,19 +388,25 @@ def follow_def_uses(self, pack): assert isinstance(pack, Pair) - for ldep in pack.left.provides(): - for rdep in pack.right.provides(): + print "lprov", pack.leftmost(node=True).provides_count(), + print "rprov", pack.rightmost(node=True).provides_count() + for ldep in pack.leftmost(node=True).provides(): + for rdep in pack.rightmost(node=True).provides(): lnode = ldep.to rnode = rdep.to - result = pack.left.getoperation().result + print "trying", lnode.getindex(), rnode.getindex(), lnode, rnode + left = pack.leftmost() args = lnode.getoperation().getarglist() - if result is None or result not in args: + if left is None or left not in args: continue isomorph = isomorphic(lnode.getoperation(), rnode.getoperation()) if isomorph and lnode.is_before(rnode): pair = self.packset.can_be_packed(lnode, rnode, pack, True) if pair: + print "creating pair" , pair, pair.operations[0].op, pair.operations[1].op self.packset.add_pack(pair) + else: + print "!!!creating pair" , lnode, rnode def combine_packset(self): """ Combination is done iterating the packs that have @@ -404,7 +421,6 @@ i = 0 j = 0 end_ij = len(self.packset.packs) - orphan = {} while True: len_before = len(self.packset.packs) i = 0 @@ -616,6 +632,7 @@ cost, benefit_factor = self.cb_signext(pack) # self.savings += benefit_factor * times - cost + print "$$$ recording", benefit_factor, "*", times, "-", cost, "now:", self.savings def cb_signext(self, pack): left = pack.leftmost() @@ -627,13 +644,16 @@ def record_cast_int(self, fromsize, tosize, count): # for each move there is 1 instruction self.savings += -count + print "$$$ cast", -count, "now", self.savings def record_vector_pack(self, src, index, count): if src.datatype == FLOAT: if index == 1 and count == 1: self.savings -= 2 + print "$$$ vector pack -2 now:", self.savings return self.savings -= count + print "$$$ vector pack ", count, "now", self.savings def record_vector_unpack(self, src, index, count): self.record_vector_pack(src, index, count) @@ -680,6 +700,7 @@ if self.profitable_pack(lnode, rnode, origin_pack, forward): return Pair(lnode, rnode) else: + print "dependent" if self.contains_pair(lnode, rnode): return None if origin_pack is not None: @@ -688,24 +709,18 @@ def contains_pair(self, lnode, rnode): for pack in self.packs: - if pack.left is lnode or pack.right is rnode: + if pack.leftmost(node=True) is lnode or \ + pack.rightmost(node=True) is rnode: return True return False def profitable_pack(self, lnode, rnode, origin_pack, forward): - lpacknode = origin_pack.left - if self.prohibit_packing(origin_pack, - lpacknode.getoperation(), - lnode.getoperation(), - forward): + if self.prohibit_packing(origin_pack, origin_pack.leftmost(), + lnode.getoperation(), forward): return False - rpacknode = origin_pack.right - if self.prohibit_packing(origin_pack, - rpacknode.getoperation(), - rnode.getoperation(), - forward): + if self.prohibit_packing(origin_pack, origin_pack.rightmost(), + rnode.getoperation(), forward): return False - return True def prohibit_packing(self, pack, packed, inquestion, forward): @@ -713,7 +728,7 @@ if inquestion.vector == -1: return True if packed.is_primitive_array_access(): - if packed.getarg(1) == inquestion.result: + if packed.getarg(1) is inquestion: return True if not forward and inquestion.getopnum() == rop.INT_SIGNEXT: # prohibit the packing of signext in backwards direction @@ -742,37 +757,37 @@ def accumulates_pair(self, lnode, rnode, origin_pack): # lnode and rnode are isomorphic and dependent assert isinstance(origin_pack, Pair) - lop = lnode.getoperation() - opnum = lop.getopnum() + left = lnode.getoperation() + opnum = left.getopnum() if opnum in (rop.FLOAT_ADD, rop.INT_ADD, rop.FLOAT_MUL): - roper = rnode.getoperation() - assert lop.numargs() == 2 and lop.result is not None - accum_var, accum_pos = self.getaccumulator_variable(lop, roper, origin_pack) + right = rnode.getoperation() + assert left.numargs() == 2 and not left.returns_void() + accum_var, accum_pos = self.getaccumulator_variable(left, right, origin_pack) if not accum_var: return None - # the dependency exists only because of the result of lnode + # the dependency exists only because of the left? for dep in lnode.provides(): if dep.to is rnode: if not dep.because_of(accum_var): # not quite ... this is not handlable return None # get the original variable - accum_var = lop.getarg(accum_pos) + accum_var = left.getarg(accum_pos) # in either of the two cases the arguments are mixed, # which is not handled currently var_pos = (accum_pos + 1) % 2 - plop = origin_pack.left.getoperation() - if lop.getarg(var_pos) is not plop.result: + if left.getarg(var_pos) is not origin_pack.leftmost(): return None - prop = origin_pack.right.getoperation() - if roper.getarg(var_pos) is not prop.result: + if right.getarg(var_pos) is not origin_pack.rightmost(): return None # this can be handled by accumulation - ptype = origin_pack.output_type - if ptype.getsize() != 8: + size = INT_WORD + if left.type == 'f': + size = FLOAT_WORD + if left.bytesize == right.bytesize and left.bytesize == size: # do not support if if the type size is smaller # than the cpu word size. # WHY? @@ -781,16 +796,14 @@ # considered. => tree pattern matching problem. return None accum = Accum(opnum, accum_var, accum_pos) - return AccumPair(lnode, rnode, ptype, ptype, accum) + return AccumPair(lnode, rnode, accum) return None - def getaccumulator_variable(self, lop, rop, origin_pack): - args = rop.getarglist() - for i, arg in enumerate(args): - if arg is lop.result: + def getaccumulator_variable(self, left, right, origin_pack): + for i, arg in enumerate(right.getarglist()): + if arg is left: return arg, i - # return None, -1 def accumulate_prepare(self, state): diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -147,6 +147,11 @@ i += 1 arg = self.getarg(i) if arg.is_constant(): + if arg.type == 'i': + self.setdatatype('i', INT_WORD, True) + else: + assert arg.type == 'f' + self.setdatatype('f', FLOAT_WORD, False) return self.setdatatype(arg.datatype, arg.bytesize, arg.signed) assert self.datatype != '\x00' diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py --- a/rpython/jit/tool/oparser.py +++ b/rpython/jit/tool/oparser.py @@ -198,6 +198,8 @@ from rpython.rtyper.lltypesystem import lltype, llmemory assert elem.startswith('p') v = InputArgRef(lltype.nullptr(llmemory.GCREF.TO)) + # ensure that the variable gets the proper naming + self.update_memo(v, elem) self.vars[elem] = v return v @@ -353,9 +355,24 @@ raise ParseError("Double assign to var %s in line: %s" % (res, line)) resop = self.create_op(opnum, args, res, descr, fail_args) res = self.update_vector(resop, res) + self.update_memo(resop, res) self.vars[res] = resop return resop + def update_memo(self, val, name): + """ This updates the id of the operation or inputarg. + Internally you will see the same variable names as + in the trace as string. + """ + regex = re.compile("[prif](\d+)") + match = regex.match(name) + if match: + counter = int(match.group(1)) + countdict = val._repr_memo + countdict._d[val] = counter + if countdict.counter < counter: + countdict.counter = counter + def update_vector(self, resop, var): pattern = re.compile('.*\[(\d+)x(u?)(i|f)(\d+)\]') match = pattern.match(var) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit