Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r90588:c05892e069b0 Date: 2017-03-08 14:48 +0100 http://bitbucket.org/pypy/pypy/changeset/c05892e069b0/
Log: hg merge shadowstack-perf-2 Two changes that together bring the performance of shadowstack close to asmgcc---close enough that we can now make shadowstack the default even on Linux. Yay! The changes are: * Compile with "gcc -flto" or "clang -flto". Gives a speed-up, and cannot be used with asmgcc. * Add complicated logic in shadowcolor.py to optimize the placement of the roots in the shadow stack. Long code, but should be ok because it is (hopefully) well-tested and independent. diff too long, truncating to 2000 out of 2343 lines diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -17,10 +17,10 @@ DEFL_GC = "incminimark" # XXX DEFL_ROOTFINDER_WITHJIT = "shadowstack" -if sys.platform.startswith("linux"): - _mach = os.popen('uname -m', 'r').read().strip() - if _mach.startswith('x86') or _mach in ['i386', 'i486', 'i586', 'i686']: - DEFL_ROOTFINDER_WITHJIT = "asmgcc" # only for Linux on x86 / x86-64 +## if sys.platform.startswith("linux"): +## _mach = os.popen('uname -m', 'r').read().strip() +## if _mach.startswith('x86') or _mach in ['i386', 'i486', 'i586', 'i686']: +## DEFL_ROOTFINDER_WITHJIT = "asmgcc" # only for Linux on x86 / x86-64 IS_64_BITS = sys.maxint > 2147483647 diff --git a/rpython/flowspace/model.py b/rpython/flowspace/model.py --- a/rpython/flowspace/model.py +++ b/rpython/flowspace/model.py @@ -96,6 +96,13 @@ from rpython.translator.tool.graphpage import FlowGraphPage FlowGraphPage(t, [self]).display() + def showbg(self, t=None): + import os + self.show(t) + if os.fork() == 0: + self.show(t) + os._exit(0) + view = show @@ -191,6 +198,11 @@ txt = "raise block" else: txt = "codeless block" + if len(self.inputargs) > 0: + if len(self.inputargs) > 1: + txt += '[%s...]' % (self.inputargs[0],) + else: + txt += '[%s]' % (self.inputargs[0],) return txt def __repr__(self): diff --git a/rpython/memory/gctransform/asmgcroot.py b/rpython/memory/gctransform/asmgcroot.py --- a/rpython/memory/gctransform/asmgcroot.py +++ b/rpython/memory/gctransform/asmgcroot.py @@ -341,6 +341,9 @@ # called first, to initialize self.belongs_to_current_thread. assert not hasattr(self, 'gc_detach_callback_pieces_ptr') + def postprocess_graph(self, gct, graph, any_inlining): + pass + def walk_stack_roots(self, collect_stack_root, is_minor=False): gcdata = self.gcdata gcdata._gc_collect_stack_root = collect_stack_root diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -205,6 +205,9 @@ if minimal_transform: self.need_minimal_transform(graph) if inline: + assert minimal_transform, ( + "%r has both inline=True and minimal_transform=False" + % (graph,)) self.graphs_to_inline[graph] = True return annhelper.graph2const(graph) @@ -410,7 +413,7 @@ self.identityhash_ptr = getfn(GCClass.identityhash.im_func, [s_gc, s_gcref], annmodel.SomeInteger(), - minimal_transform=False, inline=True) + minimal_transform=False) if getattr(GCClass, 'obtain_free_space', False): self.obtainfreespace_ptr = getfn(GCClass.obtain_free_space.im_func, [s_gc, annmodel.SomeInteger()], @@ -419,7 +422,6 @@ if GCClass.moving_gc: self.id_ptr = getfn(GCClass.id.im_func, [s_gc, s_gcref], annmodel.SomeInteger(), - inline = True, minimal_transform = False) else: self.id_ptr = None @@ -600,6 +602,9 @@ "the custom trace hook %r for %r can cause " "the GC to be called!" % (func, TP)) + def postprocess_graph(self, graph, any_inlining): + self.root_walker.postprocess_graph(self, graph, any_inlining) + def consider_constant(self, TYPE, value): self.layoutbuilder.consider_constant(TYPE, value, self.gcdata.gc) diff --git a/rpython/memory/gctransform/shadowcolor.py b/rpython/memory/gctransform/shadowcolor.py new file mode 100644 --- /dev/null +++ b/rpython/memory/gctransform/shadowcolor.py @@ -0,0 +1,803 @@ +from rpython.rtyper.lltypesystem import lltype, llmemory +from rpython.flowspace.model import mkentrymap, checkgraph, Block, Link +from rpython.flowspace.model import Variable, Constant, SpaceOperation +from rpython.tool.algo.regalloc import perform_register_allocation +from rpython.tool.algo.unionfind import UnionFind +from rpython.translator.unsimplify import varoftype, insert_empty_block +from rpython.translator.unsimplify import insert_empty_startblock, split_block +from rpython.translator.simplify import join_blocks +from rpython.rlib.rarithmetic import intmask +from collections import defaultdict + + +def is_trivial_rewrite(op): + return (op.opname in ('same_as', 'cast_pointer', 'cast_opaque_ptr') + and isinstance(op.args[0], Variable)) + + +def find_predecessors(graph, pending_pred): + """Return the set of variables whose content can end up inside one + of the 'pending_pred', which is a list of (block, var) tuples. + """ + entrymap = mkentrymap(graph) + if len(entrymap[graph.startblock]) != 1: + insert_empty_startblock(graph) + entrymap = mkentrymap(graph) + + pred = set([v for block, v in pending_pred]) + + def add(block, v): + if isinstance(v, Variable): + if v not in pred: + pending_pred.append((block, v)) + pred.add(v) + + while pending_pred: + block, v = pending_pred.pop() + if v in block.inputargs: + var_index = block.inputargs.index(v) + for link in entrymap[block]: + prevblock = link.prevblock + if prevblock is not None: + add(prevblock, link.args[var_index]) + else: + for op in block.operations: + if op.result is v: + if is_trivial_rewrite(op): + add(block, op.args[0]) + break + return pred + + +def find_successors(graph, pending_succ): + """Return the set of variables where one of the 'pending_succ' can + end up. 'block_succ' is a list of (block, var) tuples. + """ + succ = set([v for block, v in pending_succ]) + + def add(block, v): + if isinstance(v, Variable): + if v not in succ: + pending_succ.append((block, v)) + succ.add(v) + + while pending_succ: + block, v = pending_succ.pop() + for op in block.operations: + if op.args and v is op.args[0] and is_trivial_rewrite(op): + add(block, op.result) + for link in block.exits: + for i, v1 in enumerate(link.args): + if v1 is v: + add(link.target, link.target.inputargs[i]) + return succ + + +def find_interesting_variables(graph): + # Decide which variables are "interesting" or not. Interesting + # variables contain at least the ones that appear in gc_push_roots + # and gc_pop_roots. + pending_pred = [] + pending_succ = [] + interesting_vars = set() + for block in graph.iterblocks(): + for op in block.operations: + if op.opname == 'gc_push_roots': + for v in op.args: + if not isinstance(v, Variable): + continue + interesting_vars.add(v) + pending_pred.append((block, v)) + elif op.opname == 'gc_pop_roots': + for v in op.args: + if not isinstance(v, Variable): + continue + assert v in interesting_vars # must be pushed just above + pending_succ.append((block, v)) + if not interesting_vars: + return None + + # If there is a path from a gc_pop_roots(v) to a subsequent + # gc_push_roots(w) where w contains the same value as v along that + # path, then we consider all intermediate blocks along that path + # which contain a copy of the same value, and add these variables + # as "interesting", too. Formally, a variable in a block is + # "interesting" if it is both a "predecessor" and a "successor", + # where predecessors are variables which (sometimes) end in a + # gc_push_roots, and successors are variables which (sometimes) + # come from a gc_pop_roots. + pred = find_predecessors(graph, pending_pred) + succ = find_successors(graph, pending_succ) + interesting_vars |= (pred & succ) + + return interesting_vars + + +def allocate_registers(graph): + interesting_vars = find_interesting_variables(graph) + if not interesting_vars: + return None + regalloc = perform_register_allocation(graph, interesting_vars.__contains__) + assert regalloc.graph is graph + regalloc.find_num_colors() + return regalloc + + +def _gc_save_root(index, var): + c_index = Constant(index, lltype.Signed) + return SpaceOperation('gc_save_root', [c_index, var], + varoftype(lltype.Void)) + +def _gc_restore_root(index, var): + c_index = Constant(index, lltype.Signed) + return SpaceOperation('gc_restore_root', [c_index, var], + varoftype(lltype.Void)) + +def make_bitmask(filled, graph='?'): + n = filled.count(False) + if n == 0: + return (None, None) + bitmask = 0 + last_index = 0 + for i in range(len(filled)): + if not filled[i]: + bitmask <<= (i - last_index) + last_index = i + bitmask |= 1 + assert bitmask & 1 + if bitmask != intmask(bitmask): + raise GCBitmaskTooLong("the graph %r is too complex: cannot create " + "a bitmask telling than more than 31/63 " + "shadowstack entries are unused" % (graph,)) + # the mask is always a positive value, but it is replaced by a + # negative value during a minor collection root walking. Then, + # if the next minor collection finds an already-negative value, + # we know we can stop. So that's why we don't include here an + # optimization to not re-write a same-valued mask: it is important + # to re-write the value, to turn it from potentially negative back + # to positive, in order to mark this shadow frame as modified. + assert bitmask > 0 + return (last_index, bitmask) + + +def expand_one_push_roots(regalloc, args): + if regalloc is None: + assert len(args) == 0 + else: + filled = [False] * regalloc.numcolors + for v in args: + index = regalloc.getcolor(v) + assert not filled[index] + filled[index] = True + yield _gc_save_root(index, v) + bitmask_index, bitmask = make_bitmask(filled, regalloc.graph) + if bitmask_index is not None: + # xxx we might in some cases avoid this gc_save_root + # entirely, if we know we're after another gc_push/gc_pop + # that wrote exactly the same mask at the same index + bitmask_c = Constant(bitmask, lltype.Signed) + yield _gc_save_root(bitmask_index, bitmask_c) + +def expand_one_pop_roots(regalloc, args): + if regalloc is None: + assert len(args) == 0 + else: + for v in args: + index = regalloc.getcolor(v) + yield _gc_restore_root(index, v) + + +def expand_push_roots(graph, regalloc): + """Expand gc_push_roots into a series of gc_save_root, including + writing a bitmask tag to mark some entries as not-in-use. + (If regalloc is None, it will still remove empty gc_push_roots.) + """ + for block in graph.iterblocks(): + any_change = False + newops = [] + for op in block.operations: + if op.opname == 'gc_push_roots': + args = [v for v in op.args if isinstance(v, Variable)] + newops += expand_one_push_roots(regalloc, args) + any_change = True + else: + newops.append(op) + if any_change: + block.operations = newops + + +def move_pushes_earlier(graph, regalloc): + """gc_push_roots and gc_pop_roots are pushes/pops to the shadowstack, + immediately enclosing the operation that needs them (typically a call). + Here, we try to move individual pushes earlier. + + Should run after expand_push_roots(), but before expand_pop_roots(), + so that it sees individual 'gc_save_root' operations but bulk + 'gc_pop_roots' operations. + """ + # Concrete example (assembler tested on x86-64 gcc 5.3 and clang 3.7): + # + # ----original---- ----move_pushes_earlier---- + # + # while (a > 10) { *foo = b; + # *foo = b; while (a > 10) { + # a = g(a); a = g(a); + # b = *foo; b = *foo; + # // *foo = b; + # } } + # return b; return b; + # + # => the store and the => the store is before, and gcc/clang + # load are in the loop, moves the load after the loop + # even in the assembler (the commented-out '*foo=b' is removed + # here, but gcc/clang would also remove it) + + # Draft of the algorithm: see shadowcolor.txt + + if not regalloc: + return + + entrymap = mkentrymap(graph) + assert len(entrymap[graph.startblock]) == 1 + + inputvars = {} # {inputvar: (its block, its index in inputargs)} + for block in graph.iterblocks(): + for i, v in enumerate(block.inputargs): + inputvars[v] = (block, i) + + Plist = [] + + for index in range(regalloc.numcolors): + U = UnionFind() + + S = set() + for block in graph.iterblocks(): + for op in reversed(block.operations): + if op.opname == 'gc_pop_roots': + break + else: + continue # no gc_pop_roots in this block + for v in op.args: + if isinstance(v, Variable) and regalloc.checkcolor(v, index): + break + else: + continue # no variable goes into index i + + succ = set() + pending_succ = [(block, v)] + while pending_succ: + block1, v1 = pending_succ.pop() + assert regalloc.checkcolor(v1, index) + for op1 in block1.operations: + if is_trivial_rewrite(op1) and op1.args[0] is v1: + if regalloc.checkcolor(op1.result, index): + pending_succ.append((block1, op1.result)) + for link1 in block1.exits: + for i2, v2 in enumerate(link1.args): + if v2 is not v1: + continue + block2 = link1.target + w2 = block2.inputargs[i2] + if w2 in succ or not regalloc.checkcolor(w2, index): + continue + succ.add(w2) + for op2 in block2.operations: + if op2.opname in ('gc_save_root', 'gc_pop_roots'): + break + else: + pending_succ.append((block2, w2)) + U.union_list(list(succ)) + S.update(succ) + + G = defaultdict(set) + for block in graph.iterblocks(): + found = False + for opindex, op in enumerate(block.operations): + if op.opname == 'gc_save_root': + if (isinstance(op.args[1], Constant) and + op.args[1].concretetype == lltype.Signed): + break + elif op.args[0].value == index: + found = True + break + if not found or not isinstance(op.args[1], Variable): + continue # no matching gc_save_root in this block + + key = (block, op) + pred = set() + pending_pred = [(block, op.args[1], opindex)] + while pending_pred: + block1, v1, opindex1 = pending_pred.pop() + assert regalloc.getcolor(v1) == index + for i in range(opindex1-1, -1, -1): + op1 = block1.operations[i] + if op1.opname == 'gc_pop_roots': + break # stop + if op1.result is v1: + if not is_trivial_rewrite(op1): + break # stop + if not regalloc.checkcolor(op1.args[0], index): + break # stop + v1 = op1.args[0] + else: + varindex = block1.inputargs.index(v1) + if v1 in pred: + continue # already done + pred.add(v1) + for link1 in entrymap[block1]: + prevblock1 = link1.prevblock + if prevblock1 is not None: + w1 = link1.args[varindex] + if isinstance(w1, Variable) and w1 not in pred: + if regalloc.checkcolor(w1, index): + pending_pred.append((prevblock1, w1, + len(prevblock1.operations))) + U.union_list(list(pred)) + for v1 in pred: + G[v1].add(key) + + M = S.intersection(G) + + parts_target = {} + for v in M: + vp = U.find_rep(v) + if vp not in parts_target: + new_part = (index, set(), set()) + # (index, + # subset P of variables, + # set of (block, gc_save_root)) + Plist.append(new_part) + parts_target[vp] = new_part + part = parts_target[vp] + part[1].add(v) + part[2].update(G[v]) + + # Sort P so that it prefers places that would avoid multiple + # gcsaveroots (smaller 'heuristic' result, so first in sorted + # order); but also prefers smaller overall pieces, because it + # might be possible to remove several small-scale pieces instead + # of one big-scale one. + def heuristic((index, P, gcsaveroots)): + return float(len(P)) / len(gcsaveroots) + Plist.sort(key=heuristic) + + variables_along_changes = {} + live_at_start_of_block = set() # set of (block, index) + insert_gc_push_root = defaultdict(list) + + for index, P, gcsaveroots in Plist: + # if this Plist entry is not valid any more because of changes + # done by the previous entries, drop it + if any((inputvars[v][0], index) in live_at_start_of_block for v in P): + continue + if any(op not in block.operations for block, op in gcsaveroots): + continue + for v in P: + assert regalloc.getcolor(v) == index + assert v not in variables_along_changes + + success_count = 0 + mark = [] + + for v in P: + block, varindex = inputvars[v] + for link in entrymap[block]: + w = link.args[varindex] + if link.prevblock is not None: + prevoperations = link.prevblock.operations + else: + prevoperations = [] + for op in reversed(prevoperations): + if op.opname == 'gc_pop_roots': + # it is possible to have gc_pop_roots() without + # w in the args, if w is the result of the call + # that comes just before. + if (isinstance(w, Variable) and + w in op.args and + regalloc.checkcolor(w, index)): + success_count += 1 + else: + mark.append((index, link, varindex)) + break + if op.result is w: + if is_trivial_rewrite(op) and ( + regalloc.checkcolor(op.args[0], index)): + w = op.args[0] + else: + mark.append((index, link, varindex)) + break + else: + if not isinstance(w, Variable) or w not in P: + mark.append((index, link, varindex)) + + if success_count > 0: + for block, op in gcsaveroots: + newops = list(block.operations) + newops.remove(op) + block.operations = newops + for index, link, varindex in mark: + insert_gc_push_root[link].append((index, link.args[varindex])) + for v in P: + block, varindex = inputvars[v] + variables_along_changes[v] = block, index + live_at_start_of_block.add((block, index)) + + for link in insert_gc_push_root: + newops = [_gc_save_root(index, v) + for index, v in sorted(insert_gc_push_root[link])] + insert_empty_block(link, newops=newops) + + +def expand_pop_roots(graph, regalloc): + """gc_pop_roots => series of gc_restore_root; this is done after + move_pushes_earlier() because that one doesn't work correctly if + a completely-empty gc_pop_roots is removed. + + Also notice in-block code sequences like gc_pop_roots(v) followed + by a gc_save_root(v), and drop the gc_save_root. + """ + drop = {} + for block in graph.iterblocks(): + any_change = False + newops = [] + for op in block.operations: + if op.opname == 'gc_pop_roots': + args = [v for v in op.args if isinstance(v, Variable)] + expanded = list(expand_one_pop_roots(regalloc, args)) + drop = {} + for op1 in expanded: + if isinstance(op1.args[1], Variable): + drop[op1.args[1]] = op1.args[0].value + newops += expanded + any_change = True + elif (op.opname == 'gc_save_root' and + drop.get(op.args[1]) == op.args[0].value): + any_change = True # kill the operation + else: + newops.append(op) + if any_change: + block.operations = newops + + +def add_enter_leave_roots_frame(graph, regalloc, c_gcdata): + # put 'gc_enter_roots_frame' as late as possible, but before the + # first 'gc_save_root' is reached. + # + # put the 'gc_leave_roots_frame' operations as early as possible, + # that is, just after the last 'gc_restore_root' reached. This is + # done by putting it along a link, such that the previous block + # contains a 'gc_restore_root' and from the next block it is not + # possible to reach any extra 'gc_restore_root'; then, as doing + # this is not as precise as we'd like, we first break every block + # just after their last 'gc_restore_root'. + if regalloc is None: + return + + # break blocks after their last 'gc_restore_root', unless they + # are already at the last position + for block in graph.iterblocks(): + ops = block.operations + for i in range(len(ops)-1, -1, -1): + if ops[i].opname == 'gc_restore_root': + if i < len(ops) - 1: + split_block(block, i + 1) + break + # done + + insert_empty_startblock(graph) + entrymap = mkentrymap(graph) + + # helpers + + def is_interesting_op(op): + if op.opname == 'gc_restore_root': + return True + if op.opname == 'gc_save_root': + # ignore saves that say "everything is free" + return not (isinstance(op.args[1], Constant) and + isinstance(op.args[1].value, int) and + op.args[1].value == bitmask_all_free) + return False + bitmask_all_free = (1 << regalloc.numcolors) - 1 + + def insert_along_link(link, opname, args, cache): + b2 = link.target + if b2 not in cache: + newblock = Block([v.copy() for v in b2.inputargs]) + newblock.operations.append( + SpaceOperation(opname, args, varoftype(lltype.Void))) + newblock.closeblock(Link(list(newblock.inputargs), b2)) + cache[b2] = newblock + link.target = cache[b2] + + # make a list of blocks with gc_save_root/gc_restore_root in them + interesting_blocks = [] + for block in graph.iterblocks(): + for op in block.operations: + if is_interesting_op(op): + assert block is not graph.startblock + assert block is not graph.returnblock + interesting_blocks.append(block) + break # interrupt this block, go to the next one + + # compute the blocks such that 'gc_save_root/gc_restore_root' + # exist anywhere before the start of this block + before_blocks = set() + pending = list(interesting_blocks) + seen = set(pending) + while pending: + block = pending.pop() + for link in block.exits: + before_blocks.add(link.target) + if link.target not in seen: + seen.add(link.target) + pending.append(link.target) + assert graph.startblock not in before_blocks + + # compute the blocks such that 'gc_save_root/gc_restore_root' + # exist anywhere after the start of this block + after_blocks = set(interesting_blocks) + pending = list(interesting_blocks) + while pending: + block = pending.pop() + for link in entrymap[block]: + if link.prevblock is not None: + if link.prevblock not in after_blocks: + after_blocks.add(link.prevblock) + pending.append(link.prevblock) + assert graph.returnblock not in after_blocks + + # this is the set of blocks such that, at the start of the block, + # we're "in frame", i.e. there are 'gc_save_root/gc_restore_root' + # both before and after the start of the block. + inside_blocks = before_blocks & after_blocks + inside_or_interesting_blocks = set(interesting_blocks) | inside_blocks + + # if a block contains gc_save_root/gc_restore_root but is not + # an "inside_block", then add gc_enter_roots_frame where needed + c_num = Constant(regalloc.numcolors, lltype.Signed) + for block in interesting_blocks: + if block not in inside_blocks: + i = 0 + while not is_interesting_op(block.operations[i]): + i += 1 + block.operations.insert(i, + SpaceOperation('gc_enter_roots_frame', [c_gcdata, c_num], + varoftype(lltype.Void))) + + # If a link goes from a "non-inside, non-interesting block" + # straight to an "inside_block", insert a gc_enter_roots_frame + # along the link. Similarly, if a block is a "inside-or- + # interesting_block" and exits with a link going to a + # "non-inside_block", then insert a gc_leave_roots_frame along the + # link. + cache1 = {} + cache2 = {} + for block in list(graph.iterblocks()): + if block not in inside_or_interesting_blocks: + for link in block.exits: + if link.target in inside_blocks: + insert_along_link(link, 'gc_enter_roots_frame', + [c_gcdata, c_num], cache1) + else: + for link in block.exits: + if link.target not in inside_blocks: + insert_along_link(link, 'gc_leave_roots_frame', + [], cache2) + + # check all blocks not in "inside_block": they might contain a + # gc_save_root() that writes the bitmask meaning "everything is + # free". Look only before gc_enter_roots_frame, if there is one + # in that block. Remove these out-of-frame gc_save_root(). + for block in graph.iterblocks(): + if block not in inside_blocks: + newops = [] + for i, op in enumerate(block.operations): + if op.opname == 'gc_enter_roots_frame': + newops.extend(block.operations[i:]) + break + if op.opname == 'gc_save_root' and not is_interesting_op(op): + pass # don't add in newops + else: + newops.append(op) + if len(newops) < len(block.operations): + block.operations = newops + + join_blocks(graph) # for the extra new blocks made in this function + + +class GCBitmaskTooLong(Exception): + pass + +class PostProcessCheckError(Exception): + pass + +def postprocess_double_check(graph): + # Debugging only: double-check that the placement is correct. + # Assumes that every gc_restore_root() indicates that the variable + # must be saved at the given position in the shadowstack frame (in + # practice it may have moved because of the GC, but in theory it + # is still the "same" object). So we build the set of all known + # valid-in-all-paths saved locations, and check that. + + saved = {} # {var-from-inputargs: location} where location is: + # <unset>: we haven't seen this variable so far + # set-of-indexes: says where the variable is always + # saved at the start of this block + # empty-set: same as above, so: "saved nowhere" + + in_frame = {} # {block: bool}, tells if, at the start of this block, + # we're in status "frame entered" or not + + in_frame[graph.startblock] = False + pending = set([graph.startblock]) + while pending: + block = pending.pop() + locsaved = {} + currently_in_frame = in_frame[block] + if currently_in_frame: + for v in block.inputargs: + locsaved[v] = saved[v] + for op in block.operations: + if op.opname == 'gc_restore_root': + if not currently_in_frame: + raise PostProcessCheckError(graph, block, op, 'no frame!') + if isinstance(op.args[1], Constant): + continue + num = op.args[0].value + if num not in locsaved[op.args[1]]: + raise PostProcessCheckError(graph, block, op, num, locsaved) + elif op.opname == 'gc_save_root': + if not currently_in_frame: + raise PostProcessCheckError(graph, block, op, 'no frame!') + num = op.args[0].value + # first, cancel any other variable that would be saved in 'num' + for v in locsaved: + locsaved[v] = locsaved[v].difference([num]) + # + v = op.args[1] + if isinstance(v, Variable): + locsaved[v] = locsaved[v].union([num]) + else: + if v.concretetype != lltype.Signed: + locsaved[v] = locsaved.get(v, frozenset()).union([num]) + continue + bitmask = v.value + if bitmask != 1: + # cancel any variable that would be saved in any + # position shown by the bitmask, not just 'num' + assert bitmask & 1 + assert 1 < bitmask < (2<<num) + nummask = [i for i in range(num+1) + if bitmask & (1<<(num-i))] + assert nummask[-1] == num + for v in locsaved: + locsaved[v] = locsaved[v].difference(nummask) + elif op.opname == 'gc_enter_roots_frame': + if currently_in_frame: + raise PostProcessCheckError(graph, block, op,'double enter') + currently_in_frame = True + # initialize all local variables so far with "not seen anywhere" + # (already done, apart from block.inputargs) + for v in block.inputargs: + locsaved[v] = frozenset() + elif op.opname == 'gc_leave_roots_frame': + if not currently_in_frame: + raise PostProcessCheckError(graph, block, op, 'not entered') + currently_in_frame = False + elif is_trivial_rewrite(op) and currently_in_frame: + locsaved[op.result] = locsaved[op.args[0]] + else: + locsaved[op.result] = frozenset() + for link in block.exits: + changed = False + if link.target not in in_frame: + in_frame[link.target] = currently_in_frame + changed = True + else: + if in_frame[link.target] != currently_in_frame: + raise PostProcessCheckError(graph, link.target, + 'inconsistent in_frame') + if currently_in_frame: + for i, v in enumerate(link.args): + try: + loc = locsaved[v] + except KeyError: + assert isinstance(v, Constant) + loc = frozenset() + w = link.target.inputargs[i] + if w in saved: + if loc == saved[w]: + continue # already up-to-date + loc = loc.intersection(saved[w]) + saved[w] = loc + changed = True + if changed: + pending.add(link.target) + + if in_frame.get(graph.returnblock, False): + raise PostProcessCheckError(graph, 'missing gc_leave_roots_frame') + assert graph.getreturnvar() not in saved # missing gc_leave_roots_frame? + + +def postprocess_graph(graph, c_gcdata): + """Collect information about the gc_push_roots and gc_pop_roots + added in this complete graph, and replace them with real operations. + """ + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + add_enter_leave_roots_frame(graph, regalloc, c_gcdata) + checkgraph(graph) + postprocess_double_check(graph) + return (regalloc is not None) + + +def postprocess_inlining(graph): + """We first write calls to GC functions with gc_push_roots(...) and + gc_pop_roots(...) around. Then we inline some of these functions. + As a result, the gc_push_roots and gc_pop_roots are no longer in + the same block. Fix that by moving the gc_push_roots/gc_pop_roots + inside the inlined portion of the graph, around every call. + + We could also get a correct result by doing things in a different + order, e.g. first postprocess_graph() and then inlining. However, + this order brings an important benefit: if the inlined graph has a + fast-path, like malloc_fixedsize(), then there are no gc_push_roots + and gc_pop_roots left along the fast-path. + """ + for block in graph.iterblocks(): + for i in range(len(block.operations)-1, -1, -1): + op = block.operations[i] + if op.opname == 'gc_pop_roots': + break + if op.opname == 'gc_push_roots': + _fix_graph_after_inlining(graph, block, i) + break + checkgraph(graph) + +def _fix_graph_after_inlining(graph, initial_block, initial_index): + op = initial_block.operations.pop(initial_index) + assert op.opname == 'gc_push_roots' + seen = set() + pending = [(initial_block, initial_index, op.args)] + while pending: + block, start_index, track_args = pending.pop() + if block in seen: + continue + seen.add(block) + assert block.operations != () # did not find the gc_pop_roots? + new_operations = block.operations[:start_index] + stop = False + for i in range(start_index, len(block.operations)): + op = block.operations[i] + if op.opname == 'gc_push_roots': + raise Exception("%r: seems to have inlined inside it another " + "graph which also uses GC roots" % (graph,)) + if op.opname == 'gc_pop_roots': + # end of the inlined graph, drop gc_pop_roots, keep the tail + new_operations += block.operations[i + 1:] + stop = True + break + if op.opname in ('direct_call', 'indirect_call'): + new_operations.append(SpaceOperation('gc_push_roots', + track_args[:], + varoftype(lltype.Void))) + new_operations.append(op) + new_operations.append(SpaceOperation('gc_pop_roots', + track_args[:], + varoftype(lltype.Void))) + else: + new_operations.append(op) + block.operations = new_operations + if not stop: + for link in block.exits: + track_next = [] + for v in track_args: + if not isinstance(v, Variable): + continue + i = link.args.index(v) # should really be here + w = link.target.inputargs[i] + track_next.append(w) + pending.append((link.target, 0, track_next)) diff --git a/rpython/memory/gctransform/shadowcolor.txt b/rpython/memory/gctransform/shadowcolor.txt new file mode 100644 --- /dev/null +++ b/rpython/memory/gctransform/shadowcolor.txt @@ -0,0 +1,46 @@ + + +for every frame index i: + + S_i = { variable: non-empty-set-of-sources } + + source = a variable popped by gc_pop_roots from frame index 'i', + only looking at the last gc_pop_roots in each block + keys = variables that appear in inputargs, where a value from a + 'source' can come from + + G_i = { variable: non-empty-set-of-targets } + + target = a variable pushed by gc_push_roots into frame index 'i', + only looking at the first gc_push_roots in each block + keys = variables that appear in inputargs, where a value can + end up in a 'target' + + M_i = S_i intersection G_i + + "variable in M_i" <==> at the start of this block, this variable's + value is already saved in the frame index i (provided "success" below) + + split M_i into a partition of independent parts; add (i, P), (i, P'), ... + to the global list + + +for every (i, P), ideally in some suitable order: + + for every variable in P, for every link entering this block: + + if prevblock's corresponding variable is from the last gc_pop_roots + of that block, at index i: + + *success = True* + + elif prevblock's corresponding variable is not in P: + + mark the link + + if success: + + insert a new gc_save_root() along all links marked above; + remove the original gc_save_root + + for any P' after P that has any variables in common, kill that P' diff --git a/rpython/memory/gctransform/shadowstack.py b/rpython/memory/gctransform/shadowstack.py --- a/rpython/memory/gctransform/shadowstack.py +++ b/rpython/memory/gctransform/shadowstack.py @@ -3,6 +3,7 @@ from rpython.rlib.debug import ll_assert from rpython.rlib.nonconst import NonConstant from rpython.rlib import rgc +from rpython.rlib.objectmodel import specialize from rpython.rtyper import rmodel from rpython.rtyper.annlowlevel import llhelper from rpython.rtyper.lltypesystem import lltype, llmemory @@ -10,6 +11,7 @@ from rpython.memory.gctransform.framework import ( BaseFrameworkGCTransformer, BaseRootWalker, sizeofaddr) from rpython.rtyper.rbuiltin import gen_cast +from rpython.memory.gctransform.log import log class ShadowStackFrameworkGCTransformer(BaseFrameworkGCTransformer): @@ -29,30 +31,43 @@ def push_roots(self, hop, keep_current_args=False): livevars = self.get_livevars_for_roots(hop, keep_current_args) self.num_pushs += len(livevars) - if not livevars: - return [] - c_len = rmodel.inputconst(lltype.Signed, len(livevars) ) - base_addr = hop.genop("direct_call", [self.incr_stack_ptr, c_len ], - resulttype=llmemory.Address) - for k,var in enumerate(livevars): - c_k = rmodel.inputconst(lltype.Signed, k * sizeofaddr) - v_adr = gen_cast(hop.llops, llmemory.Address, var) - hop.genop("raw_store", [base_addr, c_k, v_adr]) + hop.genop("gc_push_roots", livevars) return livevars def pop_roots(self, hop, livevars): - if not livevars: - return - c_len = rmodel.inputconst(lltype.Signed, len(livevars) ) - base_addr = hop.genop("direct_call", [self.decr_stack_ptr, c_len ], - resulttype=llmemory.Address) - if self.gcdata.gc.moving_gc: - # for moving collectors, reload the roots into the local variables - for k,var in enumerate(livevars): - c_k = rmodel.inputconst(lltype.Signed, k * sizeofaddr) - v_newaddr = hop.genop("raw_load", [base_addr, c_k], - resulttype=llmemory.Address) - hop.genop("gc_reload_possibly_moved", [v_newaddr, var]) + hop.genop("gc_pop_roots", livevars) + # NB. we emit it even if len(livevars) == 0; this is needed for + # shadowcolor.move_pushes_earlier() + + +@specialize.call_location() +def walk_stack_root(invoke, arg0, arg1, start, addr, is_minor): + skip = 0 + while addr != start: + addr -= sizeofaddr + #XXX reintroduce support for tagged values? + #if gc.points_to_valid_gc_object(addr): + # callback(gc, addr) + + if skip & 1 == 0: + content = addr.address[0] + n = llmemory.cast_adr_to_int(content) + if n & 1 == 0: + if content: # non-0, non-odd: a regular ptr + invoke(arg0, arg1, addr) + else: + # odd number: a skip bitmask + if n > 0: # initially, an unmarked value + if is_minor: + newcontent = llmemory.cast_int_to_adr(-n) + addr.address[0] = newcontent # mark + skip = n + else: + # a marked value + if is_minor: + return + skip = -n + skip >>= 1 class ShadowStackRootWalker(BaseRootWalker): @@ -73,14 +88,8 @@ return top self.decr_stack = decr_stack - def walk_stack_root(callback, start, end): - gc = self.gc - addr = end - while addr != start: - addr -= sizeofaddr - if gc.points_to_valid_gc_object(addr): - callback(gc, addr) - self.rootstackhook = walk_stack_root + self.invoke_collect_stack_root = specialize.call_location()( + lambda arg0, arg1, addr: arg0(self.gc, addr)) self.shadow_stack_pool = ShadowStackPool(gcdata) rsd = gctransformer.root_stack_depth @@ -101,8 +110,9 @@ def walk_stack_roots(self, collect_stack_root, is_minor=False): gcdata = self.gcdata - self.rootstackhook(collect_stack_root, - gcdata.root_stack_base, gcdata.root_stack_top) + walk_stack_root(self.invoke_collect_stack_root, collect_stack_root, + None, gcdata.root_stack_base, gcdata.root_stack_top, + is_minor=is_minor) def need_thread_support(self, gctransformer, getfn): from rpython.rlib import rthread # xxx fish @@ -208,7 +218,7 @@ self.thread_setup = thread_setup self.thread_run_ptr = getfn(thread_run, [], annmodel.s_None, - inline=True, minimal_transform=False) + minimal_transform=False) self.thread_die_ptr = getfn(thread_die, [], annmodel.s_None, minimal_transform=False) # no thread_before_fork_ptr here @@ -222,6 +232,16 @@ from rpython.rlib import _stacklet_shadowstack _stacklet_shadowstack.complete_destrptr(gctransformer) + def postprocess_graph(self, gct, graph, any_inlining): + from rpython.memory.gctransform import shadowcolor + if any_inlining: + shadowcolor.postprocess_inlining(graph) + use_push_pop = shadowcolor.postprocess_graph(graph, gct.c_const_gcdata) + if use_push_pop and graph in gct.graphs_to_inline: + log.WARNING("%r is marked for later inlining, " + "but is using push/pop roots. Disabled" % (graph,)) + del gct.graphs_to_inline[graph] + # ____________________________________________________________ class ShadowStackPool(object): @@ -310,11 +330,8 @@ def customtrace(gc, obj, callback, arg): obj = llmemory.cast_adr_to_ptr(obj, SHADOWSTACKREFPTR) - addr = obj.top - start = obj.base - while addr != start: - addr -= sizeofaddr - gc._trace_callback(callback, arg, addr) + walk_stack_root(gc._trace_callback, callback, arg, obj.base, obj.top, + is_minor=False) # xxx optimize? gc = gctransformer.gcdata.gc assert not hasattr(gc, 'custom_trace_dispatcher') diff --git a/rpython/memory/gctransform/test/test_shadowcolor.py b/rpython/memory/gctransform/test/test_shadowcolor.py new file mode 100644 --- /dev/null +++ b/rpython/memory/gctransform/test/test_shadowcolor.py @@ -0,0 +1,791 @@ +from rpython.rtyper.lltypesystem import lltype, llmemory +from rpython.rtyper.lltypesystem.lloperation import llop +from rpython.rtyper.test.test_llinterp import gengraph +from rpython.conftest import option +from rpython.memory.gctransform.shadowcolor import * +from rpython.flowspace import model as graphmodel +from rpython.translator.simplify import join_blocks, cleanup_graph +from hypothesis import given, strategies + + +def make_graph(f, argtypes): + t, rtyper, graph = gengraph(f, argtypes, viewbefore=False) + if getattr(option, 'view', False): + graph.show() + return graph + +def nameof(v): + return v._name.rstrip('_') + +def summary(interesting_vars): + result = {} + for v in interesting_vars: + name = nameof(v) + result[name] = result.get(name, 0) + 1 + return result + +def summary_regalloc(regalloc): + result = [] + for block in regalloc.graph.iterblocks(): + print block.inputargs + for op in block.operations: + print '\t', op + blockvars = block.inputargs + [op.result for op in block.operations] + for v in blockvars: + if regalloc.consider_var(v): + result.append((nameof(v), regalloc.getcolor(v))) + print '\t\t%s: %s' % (v, regalloc.getcolor(v)) + result.sort() + return result + + +def test_find_predecessors_1(): + def f(a, b): + c = a + b + return c + graph = make_graph(f, [int, int]) + pred = find_predecessors(graph, [(graph.returnblock, graph.getreturnvar())]) + assert summary(pred) == {'c': 1, 'v': 1} + +def test_find_predecessors_2(): + def f(a, b): + c = a + b + while a > 0: + a -= 2 + return c + graph = make_graph(f, [int, int]) + pred = find_predecessors(graph, [(graph.returnblock, graph.getreturnvar())]) + assert summary(pred) == {'c': 3, 'v': 1} + +def test_find_predecessors_3(): + def f(a, b): + while b > 100: + b -= 2 + if b > 10: + c = a + b # 'c' created in this block + else: + c = a - b # 'c' created in this block + return c # 'v' is the return var + graph = make_graph(f, [int, int]) + pred = find_predecessors(graph, [(graph.returnblock, graph.getreturnvar())]) + assert summary(pred) == {'c': 2, 'v': 1} + +def test_find_predecessors_4(): + def f(a, b): # 'a' in the input block + while b > 100: # 'a' in the loop header block + b -= 2 # 'a' in the loop body block + if b > 10: # 'a' in the condition block + while b > 5: # nothing + b -= 2 # nothing + c = a + b # 'c' created in this block + else: + c = a + return c # 'v' is the return var + graph = make_graph(f, [int, int]) + pred = find_predecessors(graph, [(graph.returnblock, graph.getreturnvar())]) + assert summary(pred) == {'a': 4, 'c': 1, 'v': 1} + +def test_find_predecessors_trivial_rewrite(): + def f(a, b): # 'b' in empty startblock + while a > 100: # 'b' + a -= 2 # 'b' + c = llop.same_as(lltype.Signed, b) # 'c', 'b' + while b > 10: # 'c' + b -= 2 # 'c' + d = llop.same_as(lltype.Signed, c) # 'd', 'c' + return d # 'v' is the return var + graph = make_graph(f, [int, int]) + pred = find_predecessors(graph, [(graph.returnblock, graph.getreturnvar())]) + assert summary(pred) == {'b': 4, 'c': 4, 'd': 1, 'v': 1} + +def test_find_successors_1(): + def f(a, b): + return a + b + graph = make_graph(f, [int, int]) + succ = find_successors(graph, [(graph.startblock, graph.getargs()[0])]) + assert summary(succ) == {'a': 1} + +def test_find_successors_2(): + def f(a, b): + if b > 10: + return a + b + else: + return a - b + graph = make_graph(f, [int, int]) + succ = find_successors(graph, [(graph.startblock, graph.getargs()[0])]) + assert summary(succ) == {'a': 3} + +def test_find_successors_3(): + def f(a, b): + if b > 10: # 'a' condition block + a = a + b # 'a' input + while b > 100: + b -= 2 + while b > 5: # 'a' in loop header + b -= 2 # 'a' in loop body + return a * b # 'a' in product + graph = make_graph(f, [int, int]) + succ = find_successors(graph, [(graph.startblock, graph.getargs()[0])]) + assert summary(succ) == {'a': 5} + +def test_find_successors_trivial_rewrite(): + def f(a, b): # 'b' in empty startblock + while a > 100: # 'b' + a -= 2 # 'b' + c = llop.same_as(lltype.Signed, b) # 'c', 'b' + while b > 10: # 'c', 'b' + b -= 2 # 'c', 'b' + d = llop.same_as(lltype.Signed, c) # 'd', 'c' + return d # 'v' is the return var + graph = make_graph(f, [int, int]) + pred = find_successors(graph, [(graph.startblock, graph.getargs()[1])]) + assert summary(pred) == {'b': 6, 'c': 4, 'd': 1, 'v': 1} + + +def test_interesting_vars_0(): + def f(a, b): + pass + graph = make_graph(f, [llmemory.GCREF, int]) + assert not find_interesting_variables(graph) + +def test_interesting_vars_1(): + def f(a, b): + llop.gc_push_roots(lltype.Void, a) + llop.gc_pop_roots(lltype.Void, a) + graph = make_graph(f, [llmemory.GCREF, int]) + assert summary(find_interesting_variables(graph)) == {'a': 1} + +def test_interesting_vars_2(): + def f(a, b, c): + llop.gc_push_roots(lltype.Void, a) + llop.gc_pop_roots(lltype.Void, a) + while b > 0: + b -= 5 + llop.gc_push_roots(lltype.Void, c) + llop.gc_pop_roots(lltype.Void, c) + graph = make_graph(f, [llmemory.GCREF, int, llmemory.GCREF]) + assert summary(find_interesting_variables(graph)) == {'a': 1, 'c': 1} + +def test_interesting_vars_3(): + def f(a, b): + llop.gc_push_roots(lltype.Void, a) + llop.gc_pop_roots(lltype.Void, a) + while b > 0: # 'a' remains interesting across the blocks of this loop + b -= 5 + llop.gc_push_roots(lltype.Void, a) + llop.gc_pop_roots(lltype.Void, a) + graph = make_graph(f, [llmemory.GCREF, int]) + assert summary(find_interesting_variables(graph)) == {'a': 4} + +def test_allocate_registers_1(): + def f(a, b): + llop.gc_push_roots(lltype.Void, a) + llop.gc_pop_roots(lltype.Void, a) + while b > 0: # 'a' remains interesting across the blocks of this loop + b -= 5 + llop.gc_push_roots(lltype.Void, a) + llop.gc_pop_roots(lltype.Void, a) + graph = make_graph(f, [llmemory.GCREF, int]) + regalloc = allocate_registers(graph) + assert summary_regalloc(regalloc) == [('a', 0)] * 4 + +def test_allocate_registers_2(): + def f(a, b, c): + llop.gc_push_roots(lltype.Void, a) + llop.gc_pop_roots(lltype.Void, a) + while b > 0: + b -= 5 + llop.gc_push_roots(lltype.Void, c) + llop.gc_pop_roots(lltype.Void, c) + graph = make_graph(f, [llmemory.GCREF, int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + assert summary_regalloc(regalloc) == [('a', 0), ('c', 0)] + +def test_allocate_registers_3(): + def f(a, b, c): + llop.gc_push_roots(lltype.Void, c, a) + llop.gc_pop_roots(lltype.Void, c, a) + while b > 0: + b -= 5 + llop.gc_push_roots(lltype.Void, a) + llop.gc_pop_roots(lltype.Void, a) + graph = make_graph(f, [llmemory.GCREF, int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + assert summary_regalloc(regalloc) == [('a', 1)] * 4 + [('c', 0)] + +def test_allocate_registers_4(): + def g(a, x): + return x # (or something different) + def f(a, b, c): + llop.gc_push_roots(lltype.Void, a, c) # 'a', 'c' + llop.gc_pop_roots(lltype.Void, a, c) + while b > 0: # 'a' only; 'c' not in push_roots + b -= 5 + llop.gc_push_roots(lltype.Void, a)# 'a' + d = g(a, c) + llop.gc_pop_roots(lltype.Void, a) + c = d + return c + graph = make_graph(f, [llmemory.GCREF, int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + assert summary_regalloc(regalloc) == [('a', 1)] * 3 + [('c', 0)] + +def test_allocate_registers_5(): + def g(a, x): + return x # (or something different) + def f(a, b, c): + while b > 0: # 'a', 'c' + b -= 5 + llop.gc_push_roots(lltype.Void, a, c) # 'a', 'c' + g(a, c) + llop.gc_pop_roots(lltype.Void, a, c) + while b < 10: + b += 2 + return c + graph = make_graph(f, [llmemory.GCREF, int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + assert summary_regalloc(regalloc) == [('a', 1)] * 2 + [('c', 0)] * 2 + +@given(strategies.lists(strategies.booleans())) +def test_make_bitmask(boollist): + index, bitmask = make_bitmask(boollist) + if index is None: + assert bitmask is None + else: + assert 0 <= index < len(boollist) + assert boollist[index] == False + assert bitmask >= 1 + while bitmask: + if bitmask & 1: + assert index >= 0 + assert boollist[index] == False + boollist[index] = True + bitmask >>= 1 + index -= 1 + assert boollist == [True] * len(boollist) + + +class FakeRegAlloc: + graph = '?' + + def __init__(self, expected_op, **colors): + self.expected_op = expected_op + self.numcolors = len(colors) + self.getcolor = colors.__getitem__ + + def check(self, got): + got = list(got) + result = [] + for spaceop in got: + assert spaceop.opname == self.expected_op + result.append((spaceop.args[0].value, spaceop.args[1])) + return result + +def test_expand_one_push_roots(): + regalloc = FakeRegAlloc('gc_save_root', a=0, b=1, c=2) + assert regalloc.check(expand_one_push_roots(regalloc, ['a', 'b', 'c'])) == [ + (0, 'a'), (1, 'b'), (2, 'c')] + assert regalloc.check(expand_one_push_roots(regalloc, ['a', 'c'])) == [ + (0, 'a'), (2, 'c'), (1, Constant(0x1, lltype.Signed))] + assert regalloc.check(expand_one_push_roots(regalloc, ['b'])) == [ + (1, 'b'), (2, Constant(0x5, lltype.Signed))] + assert regalloc.check(expand_one_push_roots(regalloc, ['a'])) == [ + (0, 'a'), (2, Constant(0x3, lltype.Signed))] + assert regalloc.check(expand_one_push_roots(regalloc, [])) == [ + (2, Constant(0x7, lltype.Signed))] + + assert list(expand_one_push_roots(None, [])) == [] + +def test_expand_one_pop_roots(): + regalloc = FakeRegAlloc('gc_restore_root', a=0, b=1, c=2) + assert regalloc.check(expand_one_pop_roots(regalloc, ['a', 'b', 'c'])) == [ + (0, 'a'), (1, 'b'), (2, 'c')] + assert regalloc.check(expand_one_pop_roots(regalloc, ['a', 'c'])) == [ + (0, 'a'), (2, 'c')] + assert regalloc.check(expand_one_pop_roots(regalloc, ['b'])) == [ + (1, 'b')] + assert regalloc.check(expand_one_pop_roots(regalloc, ['a'])) == [ + (0, 'a')] + assert regalloc.check(expand_one_pop_roots(regalloc, [])) == [] + + assert list(expand_one_pop_roots(None, [])) == [] + +def test_move_pushes_earlier_1(): + def g(a): + return a - 1 + def f(a, b): + a *= 2 + while a > 10: + llop.gc_push_roots(lltype.Void, b) + a = g(a) + llop.gc_pop_roots(lltype.Void, b) + return b + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + assert graphmodel.summary(graph) == { + 'int_mul': 1, + 'gc_enter_roots_frame': 1, + 'gc_save_root': 1, + 'gc_restore_root': 1, + 'int_gt': 1, + 'direct_call': 1, + 'gc_leave_roots_frame': 1, + } + assert len(graph.startblock.operations) == 3 + assert graph.startblock.operations[0].opname == 'int_mul' + assert graph.startblock.operations[1].opname == 'gc_enter_roots_frame' + assert graph.startblock.operations[2].opname == 'gc_save_root' + assert graph.startblock.operations[2].args[0].value == 0 + postprocess_double_check(graph) + +def test_move_pushes_earlier_2(): + def g(a): + pass + def f(a, b): + llop.gc_push_roots(lltype.Void, b) + g(a) + llop.gc_pop_roots(lltype.Void, b) + while a > 10: + a -= 2 + llop.gc_push_roots(lltype.Void, b) + g(a) + llop.gc_pop_roots(lltype.Void, b) + return b + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + assert graphmodel.summary(graph) == { + 'gc_save_root': 1, + 'gc_restore_root': 2, + 'int_gt': 1, + 'int_sub': 1, + 'direct_call': 2, + } + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + postprocess_double_check(graph) + +def test_remove_intrablock_push_roots(): + def g(a): + pass + def f(a, b): + llop.gc_push_roots(lltype.Void, b) + g(a) + llop.gc_pop_roots(lltype.Void, b) + llop.gc_push_roots(lltype.Void, b) + g(a) + llop.gc_pop_roots(lltype.Void, b) + return b + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + expand_pop_roots(graph, regalloc) + assert graphmodel.summary(graph) == { + 'gc_save_root': 1, + 'gc_restore_root': 2, + 'direct_call': 2, + } + +PSTRUCT = lltype.Ptr(lltype.GcStruct('S')) + +def test_move_pushes_earlier_rename_1(): + def g(a): + pass + def f(a, b): + llop.gc_push_roots(lltype.Void, b) + g(a) + llop.gc_pop_roots(lltype.Void, b) + c = lltype.cast_opaque_ptr(PSTRUCT, b) + while a > 10: + a -= 2 + llop.gc_push_roots(lltype.Void, c) + g(a) + llop.gc_pop_roots(lltype.Void, c) + return c + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + assert graphmodel.summary(graph) == { + 'gc_save_root': 1, + 'gc_restore_root': 2, + 'cast_opaque_ptr': 1, + 'int_gt': 1, + 'int_sub': 1, + 'direct_call': 2, + } + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + postprocess_double_check(graph) + +def test_move_pushes_earlier_rename_2(): + def g(a): + pass + def f(a, b): + llop.gc_push_roots(lltype.Void, b) + g(a) + llop.gc_pop_roots(lltype.Void, b) + while a > 10: + a -= 2 + c = lltype.cast_opaque_ptr(PSTRUCT, b) + llop.gc_push_roots(lltype.Void, c) + g(a) + llop.gc_pop_roots(lltype.Void, c) + return c + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + assert graphmodel.summary(graph) == { + 'gc_save_root': 1, + 'gc_restore_root': 2, + 'cast_opaque_ptr': 1, + 'int_gt': 1, + 'int_sub': 1, + 'direct_call': 2, + } + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + postprocess_double_check(graph) + +def test_move_pushes_earlier_rename_3(): + def g(a): + pass + def f(a, b): + llop.gc_push_roots(lltype.Void, b) + g(a) + llop.gc_pop_roots(lltype.Void, b) + while a > 10: + a -= 2 + c = lltype.cast_opaque_ptr(PSTRUCT, b) + while a > 10: + a -= 2 + llop.gc_push_roots(lltype.Void, c) + g(a) + llop.gc_pop_roots(lltype.Void, c) + return c + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + assert graphmodel.summary(graph) == { + 'gc_save_root': 1, + 'gc_restore_root': 2, + 'cast_opaque_ptr': 1, + 'int_gt': 2, + 'int_sub': 2, + 'direct_call': 2, + } + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + postprocess_double_check(graph) + +def test_move_pushes_earlier_rename_4(): + def g(a): + return a - 2 + def f(a, b): + while a > 10: + b1 = lltype.cast_opaque_ptr(PSTRUCT, b) + while a > 100: + a -= 3 + b2 = lltype.cast_opaque_ptr(llmemory.GCREF, b1) + llop.gc_push_roots(lltype.Void, b2) + a = g(a) + llop.gc_pop_roots(lltype.Void, b2) + b3 = lltype.cast_opaque_ptr(PSTRUCT, b2) + while a > 100: + a -= 4 + b4 = lltype.cast_opaque_ptr(llmemory.GCREF, b3) + llop.gc_push_roots(lltype.Void, b4) + a = g(a) + llop.gc_pop_roots(lltype.Void, b4) + b5 = lltype.cast_opaque_ptr(PSTRUCT, b4) + while a > 100: + a -= 5 + b = lltype.cast_opaque_ptr(llmemory.GCREF, b5) + return b + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + assert graphmodel.summary(graph) == { + 'gc_save_root': 1, + 'gc_restore_root': 2, + 'cast_opaque_ptr': 6, + 'int_gt': 4, + 'int_sub': 3, + 'direct_call': 2, + } + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + postprocess_double_check(graph) + +def test_add_leave_roots_frame_1(): + def g(b): + pass + def f(a, b): + if a & 1: + llop.gc_push_roots(lltype.Void, b) + g(b) + llop.gc_pop_roots(lltype.Void, b) + a += 5 + else: + llop.gc_push_roots(lltype.Void, b) + g(b) + llop.gc_pop_roots(lltype.Void, b) + a += 6 + #...b forgotten here, even though it is pushed/popped above + while a > 100: + a -= 3 + return a + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + assert len(graph.startblock.exits) == 2 + for link in graph.startblock.exits: + assert [op.opname for op in link.target.operations] == [ + 'gc_enter_roots_frame', + 'gc_save_root', + 'direct_call', + 'gc_restore_root', + 'gc_leave_roots_frame', + 'int_add'] + postprocess_double_check(graph) + +def test_add_leave_roots_frame_2(): + def g(b): + pass + def f(a, b): + llop.gc_push_roots(lltype.Void, b) + g(b) + llop.gc_pop_roots(lltype.Void, b) + #...b forgotten here; the next push/pop is empty + llop.gc_push_roots(lltype.Void) + g(b) + llop.gc_pop_roots(lltype.Void) + while a > 100: + a -= 3 + return a + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + assert [op.opname for op in graph.startblock.operations] == [ + 'gc_enter_roots_frame', + 'gc_save_root', + 'direct_call', + 'gc_restore_root', + 'gc_leave_roots_frame', + 'direct_call'] + postprocess_double_check(graph) + +def test_bug_1(): + class W: + pass + def foo(a): + if a < 10: + return W() + else: + return None + def compare(w_a, w_b): + return W() + def fetch_compare(w_a, w_b): + return W() + def is_true(a, w_b): + return not a + def call_next(w_a): + return W() + + def f(a, w_tup): + llop.gc_push_roots(lltype.Void, w_tup) + w_key = foo(a) + llop.gc_pop_roots(lltype.Void, w_tup) + + llop.gc_push_roots(lltype.Void, w_key) + w_iter = foo(a) + llop.gc_pop_roots(lltype.Void, w_key) + + has_key = w_key is not None + hasit = False + w_maxit = None + w_max_val = None + + while True: + llop.gc_push_roots(lltype.Void, w_iter, w_key, w_maxit, w_max_val) + w_item = call_next(w_iter) + llop.gc_pop_roots(lltype.Void, w_iter, w_key, w_maxit, w_max_val) + + if has_key: + llop.gc_push_roots(lltype.Void, w_iter, w_key, + w_maxit, w_max_val, w_item) + w_compare_with = fetch_compare(w_key, w_item) + llop.gc_pop_roots(lltype.Void, w_iter, w_key, + w_maxit, w_max_val, w_item) + else: + w_compare_with = w_item + + if hasit: + llop.gc_push_roots(lltype.Void, w_iter, w_key, + w_maxit, w_max_val, w_item, w_compare_with) + w_bool = compare(w_compare_with, w_max_val) + llop.gc_pop_roots(lltype.Void, w_iter, w_key, + w_maxit, w_max_val, w_item, w_compare_with) + + llop.gc_push_roots(lltype.Void, w_iter, w_key, + w_maxit, w_max_val, w_item, w_compare_with) + condition = is_true(a, w_bool) + llop.gc_pop_roots(lltype.Void, w_iter, w_key, + w_maxit, w_max_val, w_item, w_compare_with) + else: + condition = True + + if condition: + hasit = True + w_maxit = w_item + w_max_val = w_compare_with + + graph = make_graph(f, [int, llmemory.GCREF]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + postprocess_double_check(graph) + +def test_bug_2(): + def f(w_tup): + while True: + llop.gc_push_roots(lltype.Void, w_tup) + llop.gc_pop_roots(lltype.Void, w_tup) + + graph = make_graph(f, [llmemory.GCREF]) + assert not graph.startblock.operations + # this test is about what occurs if the startblock of the graph + # is also reached from another block. None of the 'simplify' + # functions actually remove that, but the JIT transformation can... + graph.startblock = graph.startblock.exits[0].target + + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + postprocess_double_check(graph) + +def test_add_enter_roots_frame_remove_empty(): + class W: + pass + def g(): + return W() + def h(x): + pass + def k(): + pass + def f(): + llop.gc_push_roots(lltype.Void) + x = g() + llop.gc_pop_roots(lltype.Void) + llop.gc_push_roots(lltype.Void, x) + h(x) + llop.gc_pop_roots(lltype.Void, x) + llop.gc_push_roots(lltype.Void) + h(x) + llop.gc_pop_roots(lltype.Void) + llop.gc_push_roots(lltype.Void) + k() + llop.gc_pop_roots(lltype.Void) + + graph = make_graph(f, []) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + assert [op.opname for op in graph.startblock.operations] == [ + "direct_call", + "gc_enter_roots_frame", + "gc_save_root", + "direct_call", + "gc_restore_root", + "gc_leave_roots_frame", + "direct_call", + "direct_call", + ] + postprocess_double_check(graph) + +def test_add_enter_roots_frame_avoided(): + def g(x): + return x + def f(x, n): + if n > 100: + llop.gc_push_roots(lltype.Void, x) + g(x) + llop.gc_pop_roots(lltype.Void, x) + return x + + graph = make_graph(f, [llmemory.GCREF, int]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + assert [op.opname for op in graph.startblock.operations] == [ + 'int_gt', 'same_as'] + [fastpath, slowpath] = graph.startblock.exits + assert fastpath.target is graph.returnblock + block2 = slowpath.target + assert [op.opname for op in block2.operations] == [ + 'gc_enter_roots_frame', + 'gc_save_root', + 'direct_call', + 'gc_restore_root', + 'gc_leave_roots_frame'] + postprocess_double_check(graph) + +def test_fix_graph_after_inlining(): + # the graph of f looks like it inlined another graph, which itself + # would be "if x > 100: foobar()". The foobar() function is supposed + # to be the big slow-path. + def foobar(): + print 42 + def f(x): + llop.gc_push_roots(lltype.Void, x) + if x > 100: # slow-path + foobar() + llop.gc_pop_roots(lltype.Void, x) + return x + graph = make_graph(f, [int]) + postprocess_inlining(graph) + cleanup_graph(graph) + assert [op.opname for op in graph.startblock.operations] == [ + 'int_gt', 'same_as'] + [fastpath, slowpath] = graph.startblock.exits + assert fastpath.target is graph.returnblock + block2 = slowpath.target + [v] = block2.inputargs + assert block2.operations[0].opname == 'gc_push_roots' + assert block2.operations[0].args == [v] + assert block2.operations[1].opname == 'direct_call' # -> foobar + assert block2.operations[2].opname == 'gc_pop_roots' + assert block2.operations[2].args == [v] + assert len(block2.exits) == 1 + assert block2.exits[0].target is graph.returnblock diff --git a/rpython/memory/gctransform/transform.py b/rpython/memory/gctransform/transform.py --- a/rpython/memory/gctransform/transform.py +++ b/rpython/memory/gctransform/transform.py @@ -97,6 +97,7 @@ self.inline = inline if translator and inline: self.lltype_to_classdef = translator.rtyper.lltype_to_classdef_mapping() + self.raise_analyzer = RaiseAnalyzer(translator) self.graphs_to_inline = {} self.graph_dependencies = {} self.ll_finalizers_ptrs = [] @@ -113,28 +114,36 @@ self.seen_graphs.add(graph) self.minimal_transform.add(graph) - def inline_helpers(self, graphs): + def inline_helpers_into(self, graph): from rpython.translator.backendopt.inline import iter_callsites - raise_analyzer = RaiseAnalyzer(self.translator) + to_enum = [] + for called, block, i in iter_callsites(graph, None): + if called in self.graphs_to_inline: + to_enum.append(called) + any_inlining = False + for inline_graph in to_enum: + try: + inline.inline_function(self.translator, inline_graph, graph, + self.lltype_to_classdef, + self.raise_analyzer, + cleanup=False) + any_inlining = True + except inline.CannotInline as e: + print 'CANNOT INLINE:', e + print '\t%s into %s' % (inline_graph, graph) + raise # for now, make it a fatal error + cleanup_graph(graph) + if any_inlining: + constant_fold_graph(graph) + return any_inlining + + def inline_helpers_and_postprocess(self, graphs): for graph in graphs: - to_enum = [] - for called, block, i in iter_callsites(graph, None): - if called in self.graphs_to_inline: - to_enum.append(called) - must_constfold = False - for inline_graph in to_enum: - try: - inline.inline_function(self.translator, inline_graph, graph, - self.lltype_to_classdef, - raise_analyzer, - cleanup=False) - must_constfold = True - except inline.CannotInline as e: - print 'CANNOT INLINE:', e - print '\t%s into %s' % (inline_graph, graph) - cleanup_graph(graph) - if must_constfold: - constant_fold_graph(graph) + any_inlining = self.inline and self.inline_helpers_into(graph) + self.postprocess_graph(graph, any_inlining) + + def postprocess_graph(self, graph, any_inlining): + pass def compute_borrowed_vars(self, graph): # the input args are borrowed, and stay borrowed for as long as they diff --git a/rpython/rlib/_stacklet_shadowstack.py b/rpython/rlib/_stacklet_shadowstack.py --- a/rpython/rlib/_stacklet_shadowstack.py +++ b/rpython/rlib/_stacklet_shadowstack.py @@ -47,14 +47,15 @@ SIZEADDR = llmemory.sizeof(llmemory.Address) def customtrace(gc, obj, callback, arg): + from rpython.memory.gctransform.shadowstack import walk_stack_root + stacklet = llmemory.cast_adr_to_ptr(obj, STACKLET_PTR) sscopy = stacklet.s_sscopy if sscopy: length_bytes = sscopy.signed[0] - while length_bytes > 0: - addr = sscopy + length_bytes - gc._trace_callback(callback, arg, addr) - length_bytes -= SIZEADDR + walk_stack_root(gc._trace_callback, callback, arg, + sscopy + SIZEADDR, sscopy + SIZEADDR + length_bytes, + is_minor=False) lambda_customtrace = lambda: customtrace def sscopy_detach_shadow_stack(): diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit