Author: Richard Plangger <planri...@gmail.com> Branch: vecopt-merge Changeset: r79673:32891e533aab Date: 2015-09-17 18:50 +0200 http://bitbucket.org/pypy/pypy/changeset/32891e533aab/
Log: reanimated vecopt integration tests that use the assembler backend. not quite sure yet how to come along supports_gc_type diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py --- a/rpython/jit/backend/llsupport/llmodel.py +++ b/rpython/jit/backend/llsupport/llmodel.py @@ -52,6 +52,10 @@ else: translator = None self.gc_ll_descr = get_ll_description(gcdescr, translator, rtyper) + # support_guard_gc_type indicates if a gc type of an object can be read. + # In some states (boehm or x86 untranslated) the type is not known just yet, + # because there are cases where it is not guarded. The precise place where it's not + # is while inlining short preamble. self.supports_guard_gc_type = self.gc_ll_descr.supports_guard_gc_type if translator and translator.config.translation.gcremovetypeptr: self.vtable_offset = None diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -648,7 +648,7 @@ startpos = self.mc.get_relative_pos() self.store_info_on_descr(startpos, tok) else: - regalloc.position = tok.position + # TODO regalloc.position = tok.position tok.pos_recovery_stub = self.generate_quick_failure(tok, regalloc) if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0: self.error_trampoline_64 = self.generate_propagate_error_64() @@ -1654,27 +1654,27 @@ self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value) def genop_guard_guard_true(self, guard_op, guard_token, locs, resloc): - loc = locs[0] - if isinstance(loc, RegLoc): - if loc.is_xmm: - self._guard_vector_true(guard_op, loc) - # XXX - self.implement_guard(guard_token, 'NZ') - return - self.mc.TEST(loc, loc) + #loc = locs[0] + #if isinstance(loc, RegLoc): + # if loc.is_xmm: + # self._guard_vector_true(guard_op, loc) + # # XXX + # self.implement_guard(guard_token, 'NZ') + # return + #self.mc.TEST(loc, loc) self.implement_guard(guard_token) genop_guard_guard_nonnull = genop_guard_guard_true def genop_guard_guard_false(self, guard_op, guard_token, locs, resloc): self.guard_success_cc = rx86.invert_condition(self.guard_success_cc) - loc = locs[0] - if isinstance(loc, RegLoc): - if loc.is_xmm: - self._guard_vector_false(guard_op, loc) - # XXX - self.implement_guard(guard_token, 'NZ') - return - self.mc.TEST(loc, loc) + # TODO loc = locs[0] + #if isinstance(loc, RegLoc): + # if loc.is_xmm: + # self._guard_vector_false(guard_op, loc) + # # XXX + # self.implement_guard(guard_token, 'NZ') + # return + #self.mc.TEST(loc, loc) self.implement_guard(guard_token) genop_guard_guard_isnull = genop_guard_guard_false @@ -1884,7 +1884,7 @@ self.mc.JMP(imm(self.propagate_exception_path)) return startpos - def generate_quick_failure(self, guardtok): + def generate_quick_failure(self, guardtok, regalloc): """ Gather information about failure """ self.mc.force_frame_size(DEFAULT_FRAME_BYTES) diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -323,8 +323,7 @@ if arg is None: faillocs.append(None) continue - accum = arg.getaccum() - if accum: + if arg.is_vector() and arg.getaccum(): # for an accumulator store the position of the original # box and in llsupport/assembler save restore information # on the descriptor diff --git a/rpython/jit/backend/x86/test/test_x86vector.py b/rpython/jit/backend/x86/test/test_x86vector.py --- a/rpython/jit/backend/x86/test/test_x86vector.py +++ b/rpython/jit/backend/x86/test/test_x86vector.py @@ -11,7 +11,7 @@ from rpython.rtyper.lltypesystem import lltype -class TestBasic(test_vector.VectorizeLLtypeTests, test_basic.Jit386Mixin): +class TestBasic(test_basic.Jit386Mixin, test_vector.VectorizeTests): # for the individual tests see # ====> ../../../metainterp/test/test_basic.py enable_opts = 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll' diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py b/rpython/jit/metainterp/optimizeopt/__init__.py --- a/rpython/jit/metainterp/optimizeopt/__init__.py +++ b/rpython/jit/metainterp/optimizeopt/__init__.py @@ -32,9 +32,9 @@ def build_opt_chain(metainterp_sd, enable_opts): optimizations = [] unroll = 'unroll' in enable_opts # 'enable_opts' is normally a dict - if (metainterp_sd.cpu is not None and - not metainterp_sd.cpu.supports_guard_gc_type): - unroll = False + #if (metainterp_sd.cpu is not None and + # not metainterp_sd.cpu.supports_guard_gc_type): + # unroll = False for name, opt in unroll_all_opts: if name in enable_opts: if opt is not None: diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -132,6 +132,9 @@ self.guard_bool_bool_node = None self._stack = False + def is_imaginary(self): + return False + def getoperation(self): return self.op def getindex(self): @@ -186,8 +189,8 @@ isinstance(descr, compile.CompileLoopVersionDescr) return False - def is_guard_early_exit(self): - return self.op.getopnum() == rop.GUARD_EARLY_EXIT + # TODO def is_guard_early_exit(self): + # return self.op.getopnum() == rop.GUARD_EARLY_EXIT def loads_from_complex_object(self): return rop._ALWAYS_PURE_LAST <= self.op.getopnum() < rop._MALLOC_FIRST @@ -286,11 +289,14 @@ return True def iterate_paths(self, to, backwards=False, path_max_len=-1, blacklist=False): - """ yield all nodes from self leading to 'to'. backwards determines - the iteration direction and blacklist marks nodes that have already been visited. - blacklist comes in handy if a property must hold for every path. not *every* possible - instance must be iterated, but trees that have already been visited can be ignored - after the have been visited + """ Yield all nodes from self leading to 'to'. + + backwards: Determines the iteration direction. + blacklist: Marks nodes that have already been visited. + It comes in handy if a property must hold for every path. + Not *every* possible instance must be iterated, but trees + that have already been visited can be ignored after the + first visit. """ if self is to: return @@ -304,6 +310,8 @@ else: iterdir = node.provides() if index >= len(iterdir): + if to is None and index == 0: + yield Path(path.path[:]) if blacklist: blacklist_visit[node] = None continue @@ -322,7 +330,8 @@ continue pathlen += 1 - if next_node is to or (path_max_len > 0 and pathlen >= path_max_len): + if next_node is to or \ + (path_max_len > 0 and pathlen >= path_max_len): yield Path(path.path[:]) # note that the destiantion node ``to'' is never blacklisted #if blacklist: @@ -334,14 +343,14 @@ i = 0 while i < len(self.adjacent_list): dep = self.adjacent_list[i] - if dep.to == node: + if dep.to is node: del self.adjacent_list[i] break i += 1 i = 0 while i < len(node.adjacent_list_back): dep = node.adjacent_list_back[i] - if dep.to == self: + if dep.to is self: del node.adjacent_list_back[i] break i += 1 @@ -358,15 +367,30 @@ pack = "p: %d" % self.pack.numops() return "Node(%s,%s i: %d)" % (self.op, pack, self.opidx) - def __ne__(self, other): - return not self.__eq__(other) + def getdotlabel(self): + """ NOT_RPTYHON """ + op_str = str(self.op) + if self.op.is_guard(): + args_str = [str(arg) for arg in self.op.getfailargs()] + op_str += " " + ','.join(args_str) + return "[%d] %s" % (self.opidx, op_str) - def __eq__(self, other): - if other is None: - return False - assert isinstance(other, Node) - return self.opidx == other.opidx +class ImaginaryNode(Node): + _index = 987654321 # big enough? :) + def __init__(self, label): + index = -1 + if not we_are_translated(): + self.dotlabel = label + index = ImaginaryNode._index + ImaginaryNode._index += 1 + Node.__init__(self, None, index) + def is_imaginary(self): + return True + + def getdotlabel(self): + """ NOT_RPTYHON """ + return self.dotlabel class Dependency(object): def __init__(self, at, to, arg, failarg=False): @@ -385,6 +409,12 @@ return True return False + def target_node(self): + return self.to + + def origin_node(self): + return self.at + def to_index(self): return self.to.getindex() def at_index(self): @@ -509,7 +539,8 @@ def __init__(self, loop): self.loop = loop self.label = Node(loop.label, 0) - self.nodes = [ Node(op,i+1) for i,op in enumerate(loop.operations) ] + self.nodes = [ Node(op,i+1) for i,op in enumerate(loop.operations) if not op.is_debug() ] + self.inodes = [] # imaginary nodes self.jump = Node(loop.jump, len(self.nodes)+1) self.invariant_vars = {} self.update_invariant_vars() @@ -523,6 +554,11 @@ def getnode(self, i): return self.nodes[i] + def imaginary_node(self, label): + node = ImaginaryNode(label) + self.inodes.append(node) + return node + def update_invariant_vars(self): label_op = self.label.getoperation() jump_op = self.jump.getoperation() @@ -559,18 +595,20 @@ node.setpriority(2) # the label operation defines all operations at the # beginning of the loop - if op.getopnum() == rop.LABEL and i != jump_pos: - node.setpriority(100) - label_pos = i - for arg in op.getarglist(): - tracker.define(arg, node) - continue # prevent adding edge to the label itself - elif node.is_guard_early_exit(): - label_node = self.nodes[label_pos] - label_node.edge_to(node,None,label='L->EE') - for arg in label_node.getoperation().getarglist(): - tracker.define(arg, node) - continue + + # TODO if op.getopnum() == rop.LABEL and i != jump_pos: + # node.setpriority(100) + # label_pos = i + # for arg in op.getarglist(): + # tracker.define(arg, node) + # continue # prevent adding edge to the label itself + #elif node.is_guard_early_exit(): + # label_node = self.nodes[label_pos] + # label_node.edge_to(node,None,label='L->EE') + # for arg in label_node.getoperation().getarglist(): + # tracker.define(arg, node) + # continue + intformod.inspect_operation(op,node) # definition of a new variable if op.type != 'v': @@ -774,20 +812,22 @@ graph += "\n" return graph + " ])" + def view(self): + """ NOT_RPYTHON """ + from rpython.translator.tool.graphpage import GraphPage + page = GraphPage() + page.source = self.as_dot() + page.links = [] + page.display() + def as_dot(self): """ NOT_RPTYHON """ if not we_are_translated(): dot = "digraph dep_graph {\n" - for node in self.nodes: - op = node.getoperation() - if op.getopnum() == rop.DEBUG_MERGE_POINT: - continue - op_str = str(op) - if op.is_guard(): - op_str += " " + ','.join([str(arg) for arg in op.getfailargs()]) - dot += " n%d [label=\"[%d]: %s\"];\n" % (node.getindex(),node.getindex(),op_str) + for node in self.nodes + self.inodes: + dot += " n%d [label=\"%s\"];\n" % (node.getindex(),node.getdotlabel()) dot += "\n" - for node in self.nodes: + for node in self.nodes + self.inodes: for dep in node.provides(): label = '' if getattr(dep, 'label', None): diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -18,6 +18,7 @@ self.worklist = [] self.invariant_oplist = [] self.invariant_vector_vars = [] + self.seen = {} def post_schedule(self): loop = self.graph.loop @@ -32,17 +33,30 @@ loop.prefix_label = loop.label.copy_and_change(opnum, args) def profitable(self): - return self.costmodel.profitable() + return True def prepare(self): - pass + for node in self.graph.nodes: + if node.depends_count() == 0: + self.worklist.insert(0, node) - def delay(self): + def emit(self, node, scheduler): + # implement me in subclass. e.g. as in VecScheduleState + return False + + def delay(self, node): return False def has_more(self): return len(self.worklist) > 0 + def ensure_args_unpacked(self, op): + pass + + def post_emit(self, op): + pass + + class Scheduler(object): """ Create an instance of this class to (re)schedule a vector trace. """ def __init__(self): @@ -75,11 +89,6 @@ """ An operation has been emitted, adds new operations to the worklist whenever their dependency count drops to zero. Keeps worklist sorted (see priority) """ - op = node.getoperation() - state.renamer.rename(op) - if unpack: - state.ensure_args_unpacked(op) - node.vector=Trueposition = len(state.oplist) worklist = state.worklist for dep in node.provides()[:]: # COPY to = dep.to @@ -104,20 +113,28 @@ worklist.insert(0, to) node.clear_dependencies() node.emitted = True + if not node.is_imaginary(): + op = node.getoperation() + state.renamer.rename(op) + if unpack: + state.ensure_args_unpacked(op) + state.post_emit(node.getoperation()) def walk_and_emit(self, state): """ Emit all the operations into the oplist parameter. Initiates the scheduling. """ assert isinstance(state, SchedulerState) + import pdb; pdb.set_trace() while state.has_more(): node = self.next(state) if node: if not state.emit(node, self): if not node.emitted: - op = node.getoperation() self.mark_emitted(node, state) - state.seen[op] = None - state.oplist.append(op) + if not node.is_imaginary(): + op = node.getoperation() + state.seen[op] = None + state.oplist.append(op) continue # it happens that packs can emit many nodes that have been @@ -246,6 +263,10 @@ assert isinstance(vecop, GuardResOp) vecop.setfailargs(op.getfailargs()) vecop.rd_snapshot = op.rd_snapshot + if pack.is_accumulating(): + for i,node in enumerate(pack.operations): + op = node.getoperation() + state.accumulation[op] = pack def prepare_arguments(state, pack, args): @@ -456,7 +477,7 @@ self.packset = packset for arg in graph.loop.inputargs: self.inputargs[arg] = None - self.seen = {} + self.accumulation = {} def expand(self, args, vecop): index = 0 @@ -496,39 +517,33 @@ return vecop return None + def post_emit(self, op): + if op.is_guard(): + # add accumulation info to the descriptor + # TODO for version in self.loop.versions: + # # this needs to be done for renamed (accum arguments) + # version.renamed_inputargs = [ renamer.rename_map.get(arg,arg) for arg in version.inputargs ] + #self.appendedvar_pos_arg_count = len(sched_data.invariant_vector_vars) + failargs = op.getfailargs() + descr = op.getdescr() + for i,arg in enumerate(failargs): + if arg is None: + continue + accum = state.accumulation.get(arg, None) + if accum: + assert isinstance(accum, AccumPack) + accum.attach_accum_info(descr.rd_accum_list, i) + def post_schedule(self): loop = self.graph.loop self.ensure_args_unpacked(loop.jump) SchedulerState.post_schedule(self) - # add accumulation info to the descriptor - # TODO for version in self.loop.versions: - # # this needs to be done for renamed (accum arguments) - # version.renamed_inputargs = [ renamer.rename_map.get(arg,arg) for arg in version.inputargs ] - #self.appended_arg_count = len(sched_data.invariant_vector_vars) - ##for guard_node in graph.guards: - ## op = guard_node.getoperation() - ## failargs = op.getfailargs() - ## for i,arg in enumerate(failargs): - ## if arg is None: - ## continue - ## accum = arg.getaccum() - ## if accum: - ## pass - ## #accum.save_to_descr(op.getdescr(),i) - #self.has_two_labels = len(sched_data.invariant_oplist) > 0 - #self.loop.operations = self.prepend_invariant_operations(sched_data) - - def profitable(self): return self.costmodel.profitable() def prepare(self): SchedulerState.prepare(self) - for node in self.graph.nodes: - if node.depends_count() == 0: - self.worklist.insert(0, node) - self.packset.accumulate_prepare(self) for arg in self.graph.loop.label.getarglist(): self.seen[arg] = None @@ -640,10 +655,14 @@ * independent """ FULL = 0 + _attrs_ = ('operations', 'accumulator', 'operator', 'position') + + operator = '\x00' + position = -1 + accumulator = None def __init__(self, ops): self.operations = ops - self.accum = None self.update_pack_of_nodes() def numops(self): @@ -776,13 +795,12 @@ rightmost = self.operations[-1] leftmost = other.operations[0] # if it is not accumulating it is valid - accum = True if self.is_accumulating(): if not other.is_accumulating(): - accum = False - elif self.accum.pos != other.accum.pos: - accum = False - return rightmost is leftmost and accum + return False + elif self.position != other.position: + return False + return rightmost is leftmost def argument_vectors(self, state, pack, index, pack_args_index): vectors = [] @@ -800,12 +818,10 @@ return "Pack(%dx %s)" % (self.numops(), self.operations) def is_accumulating(self): - return self.accum is not None + return False def clone(self, oplist): - cloned = Pack(oplist) - cloned.accum = self.accum - return cloned + return Pack(oplist) class Pair(Pack): """ A special Pack object with only two statements. """ @@ -819,10 +835,37 @@ return self.left is other.left and \ self.right is other.right -class AccumPair(Pair): - """ A pair that keeps track of an accumulation value """ - def __init__(self, left, right, accum): - assert isinstance(left, Node) - assert isinstance(right, Node) - Pair.__init__(self, left, right) - self.accum = accum +class AccumPack(Pack): + SUPPORTED = { rop.FLOAT_ADD: '+', + rop.INT_ADD: '+', + rop.FLOAT_MUL: '*', + } + + def __init__(self, nodes, operator, accum, position): + Pack.__init__(self, [left, right]) + self.accumulator = accum + self.operator = operator + self.position = position + + def getdatatype(self): + return self.accumulator.datatype + + def getbytesize(self): + return self.accumulator.bytesize + + def getseed(self): + """ The accumulatoriable holding the seed value """ + return self.accumulator + + def attach_accum_info(self, descr, position, scalar): + descr.rd_accum_list = AccumInfo(descr.rd_accum_list, + position, self.operator, + self.scalar, None) + + def is_accumulating(self): + return True + + def clone(self): + return AccumPack(operations, self.operator, + self.accumulator, self.position) + diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py --- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py @@ -58,6 +58,32 @@ op.setdescr(ResumeAtLoopHeaderDescr()) return loop + def parse_trace(self, source, inc_label_jump=True, pargs=2, iargs=10, + fargs=6, additional_args=None, replace_args=None): + args = [] + for prefix, rang in [('p',range(pargs)), + ('i',range(iargs)), + ('f',range(fargs))]: + for i in rang: + args.append(prefix + str(i)) + + assert additional_args is None or isinstance(additional_args,list) + for arg in additional_args or []: + args.append(arg) + for k,v in (replace_args or {}).items(): + for i,_ in enumerate(args): + if k == args[i]: + args[i] = v + break + indent = " " + joinedargs = ','.join(args) + fmt = (indent, joinedargs, source, indent, joinedargs) + src = "%s[%s]\n%s\n%sjump(%s)" % fmt + loop = self.parse_loop(src) + loop.graph = FakeDependencyGraph(loop) + return loop + + def assert_edges(self, graph, edge_list, exceptions): """ Check if all dependencies are met. for complex cases adding None instead of a list of integers skips the test. diff --git a/rpython/jit/metainterp/optimizeopt/test/test_guard.py b/rpython/jit/metainterp/optimizeopt/test/test_guard.py --- a/rpython/jit/metainterp/optimizeopt/test/test_guard.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_guard.py @@ -2,17 +2,17 @@ from rpython.jit.metainterp import compile from rpython.jit.metainterp.history import (TargetToken, JitCellToken, - TreeLoop, Box, Const) + TreeLoop, Const) from rpython.jit.metainterp.optimizeopt.util import equaloplists -from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData, - Pack, NotAProfitableLoop, VectorizingOptimizer) +from rpython.jit.metainterp.optimizeopt.vector import (Pack, + NotAProfitableLoop, VectorizingOptimizer) from rpython.jit.metainterp.optimizeopt.dependency import (Node, DependencyGraph, IndexVar) from rpython.jit.metainterp.optimizeopt.guard import (GuardStrengthenOpt, Guard) from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin from rpython.jit.metainterp.optimizeopt.test.test_schedule import SchedulerBaseTest -from rpython.jit.metainterp.optimizeopt.test.test_vectorize import (FakeMetaInterpStaticData, +from rpython.jit.metainterp.optimizeopt.test.test_vecopt import (FakeMetaInterpStaticData, FakeJitDriverStaticData) from rpython.jit.metainterp.resoperation import rop, ResOperation from rpython.jit.tool.oparser_model import get_model @@ -57,7 +57,7 @@ return self.opnum def box(value): - return Box._new(value) + return InputArgInt(value) def const(value): return Const._new(value) @@ -80,12 +80,13 @@ class GuardBaseTest(SchedulerBaseTest): def optguards(self, loop, user_code=False): - loop.snapshot() + #loop.snapshot() for op in loop.operations: if op.is_guard(): op.setdescr(compile.CompileLoopVersionDescr()) dep = DependencyGraph(loop) opt = GuardStrengthenOpt(dep.index_vars, False) + xxx opt.propagate_all_forward(loop, user_code) return opt @@ -159,7 +160,7 @@ assert j == len(operations), self.debug_print_operations(loop) def test_basic(self): - loop1 = self.parse(""" + loop1 = self.parse_trace(""" i10 = int_lt(i1, 42) guard_true(i10) [] i11 = int_add(i1, 1) @@ -177,7 +178,7 @@ """) def test_basic_sub(self): - loop1 = self.parse(""" + loop1 = self.parse_trace(""" i10 = int_gt(i1, 42) guard_true(i10) [] i11 = int_sub(i1, 1) @@ -195,7 +196,7 @@ """) def test_basic_mul(self): - loop1 = self.parse(""" + loop1 = self.parse_trace(""" i10 = int_mul(i1, 4) i20 = int_lt(i10, 42) guard_true(i20) [] @@ -310,7 +311,7 @@ assert not g2.implies(g1) def test_collapse(self): - loop1 = self.parse(""" + loop1 = self.parse_trace(""" i10 = int_gt(i1, 42) guard_true(i10) [] i11 = int_add(i1, 1) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py --- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py @@ -9,8 +9,7 @@ from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph from rpython.jit.metainterp.optimizeopt.schedule import Scheduler from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin -from rpython.jit.metainterp.optimizeopt.test.test_dependency import (DependencyBaseTest, - FakeDependencyGraph) +from rpython.jit.metainterp.optimizeopt.test.test_dependency import (DependencyBaseTest) from rpython.jit.metainterp.optimizeopt.test.test_vecopt import (FakeMetaInterpStaticData, FakeJitDriverStaticData) from rpython.jit.metainterp.resoperation import rop, ResOperation @@ -39,31 +38,6 @@ 'char': self.chararraydescr, } - def parse_trace(self, source, inc_label_jump=True, pargs=2, iargs=10, - fargs=6, additional_args=None, replace_args=None): - args = [] - for prefix, rang in [('p',range(pargs)), - ('i',range(iargs)), - ('f',range(fargs))]: - for i in rang: - args.append(prefix + str(i)) - - assert additional_args is None or isinstance(additional_args,list) - for arg in additional_args or []: - args.append(arg) - for k,v in (replace_args or {}).items(): - for i,_ in enumerate(args): - if k == args[i]: - args[i] = v - break - indent = " " - joinedargs = ','.join(args) - fmt = (indent, joinedargs, source, indent, joinedargs) - src = "%s[%s]\n%s\n%sjump(%s)" % fmt - loop = self.parse_loop(src) - loop.graph = FakeDependencyGraph(loop) - return loop - def pack(self, loop, l, r, input_type=None, output_type=None): return Pack(loop.graph.nodes[l:r]) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py @@ -14,6 +14,7 @@ from rpython.jit.metainterp.optimizeopt.vector import (VectorizingOptimizer, MemoryRef, isomorphic, Pair, NotAVectorizeableLoop, NotAProfitableLoop, GuardStrengthenOpt, CostModel, VectorLoop) +from rpython.jit.metainterp.optimizeopt.schedule import (Scheduler, SchedulerState) from rpython.jit.metainterp.optimize import InvalidLoop from rpython.jit.metainterp import compile from rpython.jit.metainterp.resoperation import rop, ResOperation @@ -42,17 +43,24 @@ jitdriver_sd = FakeJitDriverStaticData() def assert_vectorize(self, loop, expected_loop, call_pure_results=None): - self._do_optimize_loop(loop, call_pure_results, export_state=True) + self._do_optimize_loop(loop) self.assert_equal(loop, expected_loop) def vectoroptimizer(self, loop): metainterp_sd = FakeMetaInterpStaticData(self.cpu) jitdriver_sd = FakeJitDriverStaticData() opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0) - label_index = loop.find_first_index(rop.LABEL) - opt.orig_label_args = loop.operations[label_index].getarglist()[:] + opt.orig_label_args = loop.label.getarglist()[:] return opt + def earlyexit(self, loop): + opt = self.vectoroptimizer(loop) + graph = opt.analyse_index_calculations(loop) + graph.view() + state = SchedulerState(graph) + opt.schedule(state) + return graph.loop + def vectoroptimizer_unrolled(self, loop, unroll_factor = -1): loop.snapshot() opt = self.vectoroptimizer(loop) @@ -185,6 +193,19 @@ class BaseTestVectorize(VecTestHelper): + def test_move_guard_first(self): + trace = self.parse_trace(""" + i10 = int_add(i0, i1) + # + i11 = int_add(i0, i1) + guard_true(i11) [] + """) + add = trace.operations[1] + guard = trace.operations[2] + trace = self.earlyexit(trace) + assert trace.operations[0] is add + assert trace.operations[1] is guard + def test_vectorize_skip(self): ops = """ [p0,i0] @@ -757,7 +778,7 @@ @pytest.mark.parametrize("descr,stride,packs,suffix", [('char',1,1,'_i'),('float',8,4,'_f'),('int',8,4,'_i'),('float32',4,2,'_i')]) - def test_packset_combine_2_loads_in_trace(self, descr, stride,packs): + def test_packset_combine_2_loads_in_trace(self, descr, stride, packs, suffix): ops = """ [p0,i0] i3 = raw_load{suffix}(p0, i0, descr={type}arraydescr) diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -21,10 +21,10 @@ MemoryRef, Node, IndexVar) from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState, - Scheduler, Pack, Pair, AccumPair) + SchedulerState, Scheduler, Pack, Pair, AccumPack) from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp, - Accum, OpHelpers, VecOperation) + OpHelpers, VecOperation) from rpython.rlib import listsort from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.debug import debug_print, debug_start, debug_stop @@ -60,7 +60,7 @@ # the original loop (output of optimize_unroll) info = LoopVersionInfo(loop_info) version = info.snapshot(loop_ops, info.label_op) - loop = VectorLoop(loop_info.label_op, loop_ops[:-1], loop_ops[-1]) + loop = VectorLoop(loop_info.label_op, loop_ops[1:-1], loop_ops[-1]) try: debug_start("vec-opt-loop") metainterp_sd.logger_noopt.log_loop([], loop.operation_list(), -2, None, None, "pre vectorize") @@ -160,21 +160,23 @@ self.has_two_labels = False def propagate_all_forward(self, info, loop): - label = loop.label - jump = loop.jump - self.orig_label_args = label.getarglist_copy() - if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \ - label.getopnum() != rop.LABEL: - raise NotAVectorizeableLoop() - if jump.numargs() != label.numargs(): - raise NotAVectorizeableLoop() - + #label = loop.label + #jump = loop.jump + #if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \ + # label.getopnum() != rop.LABEL: + # import pdb; pdb. set_trace() + # raise NotAVectorizeableLoop() + #if jump.numargs() != label.numargs(): + # import pdb; pdb. set_trace() + # raise NotAVectorizeableLoop() + self.orig_label_args = loop.label.getarglist_copy() self.linear_find_smallest_type(loop) byte_count = self.smallest_type_bytes vsize = self.cpu.vector_register_size - if vsize == 0 or byte_count == 0 or label.getopnum() != rop.LABEL: + if vsize == 0 or byte_count == 0 or loop.label.getopnum() != rop.LABEL: # stop, there is no chance to vectorize this trace # we cannot optimize normal traces (if there is no label) + import pdb; pdb. set_trace() raise NotAVectorizeableLoop() # find index guards and move to the earliest position @@ -186,7 +188,7 @@ # unroll self.unroll_count = self.get_unroll_count(vsize) self.unroll_loop_iterations(loop, self.unroll_count) - self.loop.operations = self.get_newoperations(); + loop.operations = self.get_newoperations() self.clear_newoperations(); # vectorize @@ -207,29 +209,26 @@ def unroll_loop_iterations(self, loop, unroll_count): """ Unroll the loop X times. unroll_count + 1 = unroll_factor """ - op_count = len(loop.operations) - - label_op = loop.operations[0].clone() - assert label_op.getopnum() == rop.LABEL - jump_op = loop.operations[op_count-1] - assert jump_op.getopnum() in (rop.LABEL, rop.JUMP) + numops = len(loop.operations) + label_op = loop.label + jump_op = loop.jump # use the target token of the label - target_token = label_op.getdescr() - if not we_are_translated(): - target_token.assumed_classes = {} - if jump_op.getopnum() == rop.LABEL: - jump_op = ResOperation(rop.JUMP, jump_op.getarglist(), target_token) - else: - jump_op = jump_op.clone() - jump_op.setdescr(target_token) - assert jump_op.is_final() + #target_token = label_op.getdescr() + #if not we_are_translated(): + # target_token.assumed_classes = {} + #if jump_op.getopnum() == rop.LABEL: + # jump_op = ResOperation(rop.JUMP, jump_op.getarglist(), target_token) + #else: + # jump_op = jump_op.clone() + # jump_op.setdescr(target_token) + #assert jump_op.is_final() self.emit_unrolled_operation(label_op) renamer = Renamer() operations = [] - for i in range(1,op_count-1): - op = loop.operations[i].clone() + for i in range(1,numops-1): + op = loop.operations[i].copy() if op.is_guard(): assert isinstance(op, GuardResOp) failargs = renamer.rename_failargs(op, clone=True) @@ -258,13 +257,11 @@ for i, op in enumerate(operations): if op.getopnum() in prohibit_opnums: continue # do not unroll this operation twice - copied_op = op.clone() + copied_op = op.copy() if not copied_op.returns_void(): # every result assigns a new box, thus creates an entry # to the rename map. - new_assigned_box = copied_op.result.clonebox() - renamer.start_renaming(copied_op.result, new_assigned_box) - copied_op.result = new_assigned_box + renamer.start_renaming(op, copied_op) # args = copied_op.getarglist() for a, arg in enumerate(args): @@ -518,14 +515,14 @@ step vectorization would not be possible! """ graph = DependencyGraph(loop) - ee_guard_node = graph.getnode(0) - if ee_guard_node.getopnum() != rop.GUARD_EARLY_EXIT: - raise NotAVectorizeableLoop() - label_node = graph.getnode(0) + zero_deps = {} + for node in graph.nodes: + if node.depends_count() == 0: + zero_deps[node] = 0 + earlyexit = graph.imaginary_node("early exit") guards = graph.guards + one_valid = False for guard_node in guards: - if guard_node is ee_guard_node: - continue modify_later = [] last_prev_node = None valid = True @@ -537,34 +534,35 @@ # 2) non pure operation points to this guard. # but if this guard only depends on pure operations, it can be checked # at an earlier position, the non pure op can execute later! - modify_later.append((prev_node, guard_node)) + modify_later.append(prev_node) else: - for path in prev_node.iterate_paths(ee_guard_node, backwards=True, blacklist=True): - if path.is_always_pure(exclude_first=True, exclude_last=True): - path.set_schedule_priority(10) - if path.last() is ee_guard_node: - modify_later.append((path.last_but_one(), None)) - else: - # transformation is invalid. - # exit and do not enter else branch! + for path in prev_node.iterate_paths(None, backwards=True, blacklist=True): + if not path.is_always_pure(exclude_first=True): + path.set_schedule_priority(90) valid = False + if path.last() in zero_deps: + del zero_deps[path.last()] if not valid: break if valid: # transformation is valid, modify the graph and execute # this guard earlier - for a,b in modify_later: - if b is not None: - a.remove_edge_to(b) - else: - last_but_one = a - if last_but_one is ee_guard_node: - continue - ee_guard_node.remove_edge_to(last_but_one) - #label_node.edge_to(last_but_one, label='pullup') - # only the last guard needs a connection - guard_node.edge_to(ee_guard_node, label='pullup-last-guard') - self.relax_guard_to(guard_node, ee_guard_node) + one_valid = True + for node in modify_later: + node.remove_edge_to(guard_node) + # every edge that starts in the guard, the early exit + # inherts the edge and guard then provides to early exit + for dep in guard_node.provides()[:]: + earlyexit.edge_to(dep.target_node()) + guard_node.remove_edge_to(dep.target_node()) + guard_node.edge_to(earlyexit) + + for node in zero_deps.keys(): + earlyexit.edge_to(node) + # TODO self.relax_guard_to(guard_node, ee_guard_node) + if one_valid: + return graph + return None def relax_guard_to(self, guard_node, other_node): """ Relaxes a guard operation to an earlier guard. """ @@ -686,9 +684,10 @@ """ if isomorphic(lnode.getoperation(), rnode.getoperation()): if lnode.independent(rnode): - if forward and isinstance(origin_pack, AccumPair): + if forward and origin_pack.is_accumulating(): # in this case the splitted accumulator must # be combined. This case is not supported + import pdb; pdb. set_trace() raise NotAVectorizeableLoop() # if self.contains_pair(lnode, rnode): @@ -739,20 +738,15 @@ return False def combine(self, i, j): - """ Combine two packs. it is assumed that the attribute self.packs + """ Combine two packs. It is assumed that the attribute self.packs is not iterated when calling this method. """ - pack_i = self.packs[i] - pack_j = self.packs[j] - operations = pack_i.operations - for op in pack_j.operations[1:]: + pkg_a = self.packs[i] + pkg_b = self.packs[j] + operations = pkg_a.operations + for op in pkg_b.operations[1:]: operations.append(op) - pack = Pack(operations) - self.packs[i] = pack - # preserve the accum variable (if present) - pack.accum = pack_i.accum - pack_i.accum = pack_j.accum = None - + self.packs[i] = pkg_a.clone(operations) del self.packs[j] return len(self.packs) @@ -762,27 +756,27 @@ left = lnode.getoperation() opnum = left.getopnum() - if opnum in (rop.FLOAT_ADD, rop.INT_ADD, rop.FLOAT_MUL): + if opnum in AccumPack.SUPPORTED: right = rnode.getoperation() assert left.numargs() == 2 and not left.returns_void() - accum_var, accum_pos = self.getaccumulator_variable(left, right, origin_pack) - if not accum_var: + scalar, index = self.getaccumulator_variable(left, right, origin_pack) + if not scalar: return None # the dependency exists only because of the left? for dep in lnode.provides(): if dep.to is rnode: - if not dep.because_of(accum_var): + if not dep.because_of(scalar): # not quite ... this is not handlable return None # get the original variable - accum_var = left.getarg(accum_pos) + scalar = left.getarg(index) # in either of the two cases the arguments are mixed, # which is not handled currently - var_pos = (accum_pos + 1) % 2 - if left.getarg(var_pos) is not origin_pack.leftmost(): + other_index = (index + 1) % 2 + if left.getarg(other_index) is not origin_pack.leftmost(): return None - if right.getarg(var_pos) is not origin_pack.rightmost(): + if right.getarg(other_index) is not origin_pack.rightmost(): return None # this can be handled by accumulation @@ -797,8 +791,8 @@ # of leading/preceding signext/floatcast instructions needs to be # considered. => tree pattern matching problem. return None - accum = Accum(opnum, accum_var, accum_pos) - return AccumPair(lnode, rnode, accum) + operator = AccumPack.SUPPORTED[opnum] + return AccumPack(lnode, rnode, operator, scalar, index) return None diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -256,6 +256,9 @@ # common methods # -------------- + def copy(self): + return self.copy_and_change(self.opnum) + def copy_and_change(self, opnum, args=None, descr=None): "shallow copy: the returned operation is meant to be used in place of self" # XXX specialize @@ -419,6 +422,9 @@ def is_raw_array_access(self): return self.is_raw_load() or self.is_raw_store() + def is_debug(self): + return rop._DEBUG_FIRST <= self.getopnum() <= rop._DEBUG_LAST + def is_primitive_array_access(self): """ Indicates that this operations loads/stores a primitive type (int,float) """ @@ -626,27 +632,6 @@ from rpython.jit.metainterp import history return history.ConstPtr(self.getref_base()) -class Accum(object): - PLUS = '+' - MULTIPLY = '*' - - def __init__(self, opnum, var, pos): - self.var = var - self.pos = pos - self.operator = Accum.PLUS - if opnum == rop.FLOAT_MUL: - self.operator = Accum.MULTIPLY - - def getdatatype(self): - return self.var.datatype - - def getbytesize(self): - return self.var.bytesize - - def getseed(self): - """ The variable holding the seed value """ - return self.var - class CastOp(object): _mixin_ = True @@ -726,9 +711,6 @@ return False return True - def getaccum(self): - return self.accum - class AbstractInputArg(AbstractResOpOrInputArg): def set_forwarded(self, forwarded_to): self._forwarded = forwarded_to @@ -1114,6 +1096,13 @@ # must be forced, however we need to execute it anyway '_NOSIDEEFFECT_LAST', # ----- end of no_side_effect operations ----- + '_DEBUG_FIRST', + 'DEBUG_MERGE_POINT/*/n', # debugging only + 'ENTER_PORTAL_FRAME/2/n', # debugging only + 'LEAVE_PORTAL_FRAME/1/n', # debugging only + 'JIT_DEBUG/*/n', # debugging only + '_DEBUG_LAST', + 'INCREMENT_DEBUG_COUNTER/1/n', '_RAW_STORE_FIRST', 'SETARRAYITEM_GC/3d/n', @@ -1135,10 +1124,6 @@ 'UNICODESETITEM/3/n', 'COND_CALL_GC_WB/1d/n', # [objptr] (for the write barrier) 'COND_CALL_GC_WB_ARRAY/2d/n', # [objptr, arrayindex] (write barr. for array) - 'DEBUG_MERGE_POINT/*/n', # debugging only - 'ENTER_PORTAL_FRAME/2/n', # debugging only - 'LEAVE_PORTAL_FRAME/1/n', # debugging only - 'JIT_DEBUG/*/n', # debugging only 'VIRTUAL_REF_FINISH/2/n', # removed before it's passed to the backend 'COPYSTRCONTENT/5/n', # src, dst, srcstart, dststart, length 'COPYUNICODECONTENT/5/n', diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py --- a/rpython/jit/metainterp/resume.py +++ b/rpython/jit/metainterp/resume.py @@ -48,7 +48,8 @@ self.pc = pc class AccumInfo(object): - __slots__ = ('prev', 'accum_operation', 'scalar_position', 'scalar_box', 'vector_loc') + _attrs_ = ('prev', 'accum_operation', 'scalar_position', 'scalar_box', 'vector_loc') + def __init__(self, prev, position, operation, box, loc): self.prev = prev self.accum_operation = operation diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py --- a/rpython/jit/metainterp/test/test_vector.py +++ b/rpython/jit/metainterp/test/test_vector.py @@ -342,8 +342,5 @@ res = self.meta_interp(f, [size], vec_all=True) assert res == f(size) -class VectorizeLLtypeTests(VectorizeTests): +class TestLLtype(LLJitMixin, VectorizeTests): pass - -class TestLLtype(VectorizeLLtypeTests, LLJitMixin): - pass diff --git a/rpython/jit/metainterp/warmspot.py b/rpython/jit/metainterp/warmspot.py --- a/rpython/jit/metainterp/warmspot.py +++ b/rpython/jit/metainterp/warmspot.py @@ -71,7 +71,7 @@ backendopt=False, trace_limit=sys.maxint, inline=False, loop_longevity=0, retrace_limit=5, function_threshold=4, enable_opts=ALL_OPTS_NAMES, max_retrace_guards=15, - max_unroll_recursion=7, vec=0, vec_all=0, vec_cost=0, + max_unroll_recursion=7, vec=1, vec_all=0, vec_cost=0, vec_length=60, vec_ratio=2, vec_guard_ratio=3, **kwds): from rpython.config.config import ConfigError translator = interp.typer.annotator.translator _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit