Author: Armin Rigo <ar...@tunes.org> Branch: shadowstack-perf-2 Changeset: r84818:c35fab8d27db Date: 2016-05-29 19:10 +0200 http://bitbucket.org/pypy/pypy/changeset/c35fab8d27db/
Log: Finally found a way to handle mallocs correctly in RPython code, i.e. without the two sources of nonsense overhead: * only push/pop roots around the slow-path * as a result, gcc can further optimize the fast-path: the address that we grab from the nursery is not a NULL, no need to check that again diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -609,8 +609,8 @@ "the custom trace hook %r for %r can cause " "the GC to be called!" % (func, TP)) - def postprocess_graph(self, graph): - self.root_walker.postprocess_graph(self, graph) + def postprocess_graph(self, graph, any_inlining): + self.root_walker.postprocess_graph(self, graph, any_inlining) def consider_constant(self, TYPE, value): self.layoutbuilder.consider_constant(TYPE, value, self.gcdata.gc) diff --git a/rpython/memory/gctransform/shadowcolor.py b/rpython/memory/gctransform/shadowcolor.py --- a/rpython/memory/gctransform/shadowcolor.py +++ b/rpython/memory/gctransform/shadowcolor.py @@ -664,3 +664,69 @@ checkgraph(graph) postprocess_double_check(graph) return (regalloc is not None) + + +def postprocess_inlining(graph): + """We first write calls to GC functions with gc_push_roots(...) and + gc_pop_roots(...) around. Then we inline some of these functions. + As a result, the gc_push_roots and gc_pop_roots are no longer in + the same block. Fix that by moving the gc_push_roots/gc_pop_roots + inside the inlined portion of the graph, around every call. + + We could also get a correct result by doing things in a different + order, e.g. first postprocess_graph() and then inlining. However, + this order brings an important benefit: if the inlined graph has a + fast-path, like malloc_fixedsize(), then there are no gc_push_roots + and gc_pop_roots left along the fast-path. + """ + for block in graph.iterblocks(): + for i in range(len(block.operations)-1, -1, -1): + op = block.operations[i] + if op.opname == 'gc_pop_roots': + break + if op.opname == 'gc_push_roots': + _fix_graph_after_inlining(graph, block, i) + break + +def _fix_graph_after_inlining(graph, initial_block, initial_index): + op = initial_block.operations.pop(initial_index) + assert op.opname == 'gc_push_roots' + seen = set() + pending = [(initial_block, initial_index, op.args)] + while pending: + block, start_index, track_args = pending.pop() + if block in seen: + continue + seen.add(block) + assert block.operations != () # did not find the gc_pop_roots? + new_operations = block.operations[:start_index] + stop = False + for i in range(start_index, len(block.operations)): + op = block.operations[i] + if op.opname == 'gc_push_roots': + raise Exception("%r: seems to have inlined another graph " + "which also uses GC roots" % (graph,)) + if op.opname == 'gc_pop_roots': + # end of the inlined graph, drop gc_pop_roots, keep the tail + new_operations += block.operations[i + 1:] + stop = True + break + if op.opname in ('direct_call', 'indirect_call'): + new_operations.append(SpaceOperation('gc_push_roots', + track_args[:], + varoftype(lltype.Void))) + new_operations.append(op) + new_operations.append(SpaceOperation('gc_pop_roots', + track_args[:], + varoftype(lltype.Void))) + else: + new_operations.append(op) + block.operations = new_operations + if not stop: + for link in block.exits: + track_next = [] + for v in track_args: + i = link.args.index(v) # should really be here + w = link.target.inputargs[i] + track_next.append(w) + pending.append((link.target, 0, track_next)) diff --git a/rpython/memory/gctransform/shadowstack.py b/rpython/memory/gctransform/shadowstack.py --- a/rpython/memory/gctransform/shadowstack.py +++ b/rpython/memory/gctransform/shadowstack.py @@ -224,8 +224,10 @@ from rpython.rlib import _stacklet_shadowstack _stacklet_shadowstack.complete_destrptr(gctransformer) - def postprocess_graph(self, gct, graph): + def postprocess_graph(self, gct, graph, any_inlining): from rpython.memory.gctransform import shadowcolor + if any_inlining: + shadowcolor.postprocess_inlining(graph) use_push_pop = shadowcolor.postprocess_graph(graph, gct.c_const_gcdata) if use_push_pop and graph in gct.graphs_to_inline: log.WARNING("%r is marked for later inlining, " diff --git a/rpython/memory/gctransform/transform.py b/rpython/memory/gctransform/transform.py --- a/rpython/memory/gctransform/transform.py +++ b/rpython/memory/gctransform/transform.py @@ -97,6 +97,7 @@ self.inline = inline if translator and inline: self.lltype_to_classdef = translator.rtyper.lltype_to_classdef_mapping() + self.raise_analyzer = RaiseAnalyzer(translator) self.graphs_to_inline = {} self.graph_dependencies = {} self.ll_finalizers_ptrs = [] @@ -113,28 +114,33 @@ self.seen_graphs.add(graph) self.minimal_transform.add(graph) + def inline_helpers_into(self, graph): + from rpython.translator.backendopt.inline import iter_callsites + to_enum = [] + for called, block, i in iter_callsites(graph, None): + if called in self.graphs_to_inline: + to_enum.append(called) + any_inlining = False + for inline_graph in to_enum: + try: + inline.inline_function(self.translator, inline_graph, graph, + self.lltype_to_classdef, + self.raise_analyzer, + cleanup=False) + any_inlining = True + except inline.CannotInline as e: + print 'CANNOT INLINE:', e + print '\t%s into %s' % (inline_graph, graph) + raise # for now, make it a fatal error + cleanup_graph(graph) + if any_inlining: + constant_fold_graph(graph) + return any_inlining + def inline_helpers(self, graphs): - from rpython.translator.backendopt.inline import iter_callsites - raise_analyzer = RaiseAnalyzer(self.translator) for graph in graphs: - to_enum = [] - for called, block, i in iter_callsites(graph, None): - if called in self.graphs_to_inline: - to_enum.append(called) - must_constfold = False - for inline_graph in to_enum: - try: - inline.inline_function(self.translator, inline_graph, graph, - self.lltype_to_classdef, - raise_analyzer, - cleanup=False) - must_constfold = True - except inline.CannotInline as e: - print 'CANNOT INLINE:', e - print '\t%s into %s' % (inline_graph, graph) - cleanup_graph(graph) - if must_constfold: - constant_fold_graph(graph) + any_inlining = self.inline and self.inline_helpers_into(graph) + self.postprocess_graph(graph, any_inlining) def compute_borrowed_vars(self, graph): # the input args are borrowed, and stay borrowed for as long as they @@ -236,8 +242,6 @@ else: insert_empty_block(link, llops) - self.postprocess_graph(graph) - # remove the empty block at the start of the graph, which should # still be empty (but let's check) if starts_with_empty_block(graph) and inserted_empty_startblock: @@ -254,9 +258,6 @@ graph.exc_cleanup = (v, list(llops)) return is_borrowed # xxx for tests only - def postprocess_graph(self, graph): - pass - def annotate_helper(self, ll_helper, ll_args, ll_result, inline=False): assert not self.finished_helpers args_s = map(lltype_to_annotation, ll_args) diff --git a/rpython/translator/c/database.py b/rpython/translator/c/database.py --- a/rpython/translator/c/database.py +++ b/rpython/translator/c/database.py @@ -348,6 +348,7 @@ assert not self.delayedfunctionptrs self.completed = True if self.gctransformer is not None and self.gctransformer.inline: + log.database("Inlining GC helpers and postprocessing") self.gctransformer.inline_helpers(self.all_graphs()) if show_progress: dump() _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit