Author: Armin Rigo <ar...@tunes.org>
Branch: shadowstack-perf-2
Changeset: r84818:c35fab8d27db
Date: 2016-05-29 19:10 +0200
http://bitbucket.org/pypy/pypy/changeset/c35fab8d27db/

Log:    Finally found a way to handle mallocs correctly in RPython code,
        i.e. without the two sources of nonsense overhead:

        * only push/pop roots around the slow-path

        * as a result, gcc can further optimize the fast-path: the address
        that we grab from the nursery is not a NULL, no need to check that
        again

diff --git a/rpython/memory/gctransform/framework.py 
b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -609,8 +609,8 @@
                     "the custom trace hook %r for %r can cause "
                     "the GC to be called!" % (func, TP))
 
-    def postprocess_graph(self, graph):
-        self.root_walker.postprocess_graph(self, graph)
+    def postprocess_graph(self, graph, any_inlining):
+        self.root_walker.postprocess_graph(self, graph, any_inlining)
 
     def consider_constant(self, TYPE, value):
         self.layoutbuilder.consider_constant(TYPE, value, self.gcdata.gc)
diff --git a/rpython/memory/gctransform/shadowcolor.py 
b/rpython/memory/gctransform/shadowcolor.py
--- a/rpython/memory/gctransform/shadowcolor.py
+++ b/rpython/memory/gctransform/shadowcolor.py
@@ -664,3 +664,69 @@
     checkgraph(graph)
     postprocess_double_check(graph)
     return (regalloc is not None)
+
+
+def postprocess_inlining(graph):
+    """We first write calls to GC functions with gc_push_roots(...) and
+    gc_pop_roots(...) around.  Then we inline some of these functions.
+    As a result, the gc_push_roots and gc_pop_roots are no longer in
+    the same block.  Fix that by moving the gc_push_roots/gc_pop_roots
+    inside the inlined portion of the graph, around every call.
+
+    We could also get a correct result by doing things in a different
+    order, e.g. first postprocess_graph() and then inlining.  However,
+    this order brings an important benefit: if the inlined graph has a
+    fast-path, like malloc_fixedsize(), then there are no gc_push_roots
+    and gc_pop_roots left along the fast-path.
+    """
+    for block in graph.iterblocks():
+        for i in range(len(block.operations)-1, -1, -1):
+            op = block.operations[i]
+            if op.opname == 'gc_pop_roots':
+                break
+            if op.opname == 'gc_push_roots':
+                _fix_graph_after_inlining(graph, block, i)
+                break
+
+def _fix_graph_after_inlining(graph, initial_block, initial_index):
+    op = initial_block.operations.pop(initial_index)
+    assert op.opname == 'gc_push_roots'
+    seen = set()
+    pending = [(initial_block, initial_index, op.args)]
+    while pending:
+        block, start_index, track_args = pending.pop()
+        if block in seen:
+            continue
+        seen.add(block)
+        assert block.operations != ()     # did not find the gc_pop_roots?
+        new_operations = block.operations[:start_index]
+        stop = False
+        for i in range(start_index, len(block.operations)):
+            op = block.operations[i]
+            if op.opname == 'gc_push_roots':
+                raise Exception("%r: seems to have inlined another graph "
+                                "which also uses GC roots" % (graph,))
+            if op.opname == 'gc_pop_roots':
+                # end of the inlined graph, drop gc_pop_roots, keep the tail
+                new_operations += block.operations[i + 1:]
+                stop = True
+                break
+            if op.opname in ('direct_call', 'indirect_call'):
+                new_operations.append(SpaceOperation('gc_push_roots',
+                                                     track_args[:],
+                                                     varoftype(lltype.Void)))
+                new_operations.append(op)
+                new_operations.append(SpaceOperation('gc_pop_roots',
+                                                     track_args[:],
+                                                     varoftype(lltype.Void)))
+            else:
+                new_operations.append(op)
+        block.operations = new_operations
+        if not stop:
+            for link in block.exits:
+                track_next = []
+                for v in track_args:
+                    i = link.args.index(v)   # should really be here
+                    w = link.target.inputargs[i]
+                    track_next.append(w)
+                pending.append((link.target, 0, track_next))
diff --git a/rpython/memory/gctransform/shadowstack.py 
b/rpython/memory/gctransform/shadowstack.py
--- a/rpython/memory/gctransform/shadowstack.py
+++ b/rpython/memory/gctransform/shadowstack.py
@@ -224,8 +224,10 @@
         from rpython.rlib import _stacklet_shadowstack
         _stacklet_shadowstack.complete_destrptr(gctransformer)
 
-    def postprocess_graph(self, gct, graph):
+    def postprocess_graph(self, gct, graph, any_inlining):
         from rpython.memory.gctransform import shadowcolor
+        if any_inlining:
+            shadowcolor.postprocess_inlining(graph)
         use_push_pop = shadowcolor.postprocess_graph(graph, gct.c_const_gcdata)
         if use_push_pop and graph in gct.graphs_to_inline:
             log.WARNING("%r is marked for later inlining, "
diff --git a/rpython/memory/gctransform/transform.py 
b/rpython/memory/gctransform/transform.py
--- a/rpython/memory/gctransform/transform.py
+++ b/rpython/memory/gctransform/transform.py
@@ -97,6 +97,7 @@
         self.inline = inline
         if translator and inline:
             self.lltype_to_classdef = 
translator.rtyper.lltype_to_classdef_mapping()
+            self.raise_analyzer = RaiseAnalyzer(translator)
         self.graphs_to_inline = {}
         self.graph_dependencies = {}
         self.ll_finalizers_ptrs = []
@@ -113,28 +114,33 @@
         self.seen_graphs.add(graph)
         self.minimal_transform.add(graph)
 
+    def inline_helpers_into(self, graph):
+        from rpython.translator.backendopt.inline import iter_callsites
+        to_enum = []
+        for called, block, i in iter_callsites(graph, None):
+            if called in self.graphs_to_inline:
+                to_enum.append(called)
+        any_inlining = False
+        for inline_graph in to_enum:
+            try:
+                inline.inline_function(self.translator, inline_graph, graph,
+                                       self.lltype_to_classdef,
+                                       self.raise_analyzer,
+                                       cleanup=False)
+                any_inlining = True
+            except inline.CannotInline as e:
+                print 'CANNOT INLINE:', e
+                print '\t%s into %s' % (inline_graph, graph)
+                raise      # for now, make it a fatal error
+        cleanup_graph(graph)
+        if any_inlining:
+            constant_fold_graph(graph)
+        return any_inlining
+
     def inline_helpers(self, graphs):
-        from rpython.translator.backendopt.inline import iter_callsites
-        raise_analyzer = RaiseAnalyzer(self.translator)
         for graph in graphs:
-            to_enum = []
-            for called, block, i in iter_callsites(graph, None):
-                if called in self.graphs_to_inline:
-                    to_enum.append(called)
-            must_constfold = False
-            for inline_graph in to_enum:
-                try:
-                    inline.inline_function(self.translator, inline_graph, 
graph,
-                                           self.lltype_to_classdef,
-                                           raise_analyzer,
-                                           cleanup=False)
-                    must_constfold = True
-                except inline.CannotInline as e:
-                    print 'CANNOT INLINE:', e
-                    print '\t%s into %s' % (inline_graph, graph)
-            cleanup_graph(graph)
-            if must_constfold:
-                constant_fold_graph(graph)
+            any_inlining = self.inline and self.inline_helpers_into(graph)
+            self.postprocess_graph(graph, any_inlining)
 
     def compute_borrowed_vars(self, graph):
         # the input args are borrowed, and stay borrowed for as long as they
@@ -236,8 +242,6 @@
                 else:
                     insert_empty_block(link, llops)
 
-        self.postprocess_graph(graph)
-
         # remove the empty block at the start of the graph, which should
         # still be empty (but let's check)
         if starts_with_empty_block(graph) and inserted_empty_startblock:
@@ -254,9 +258,6 @@
         graph.exc_cleanup = (v, list(llops))
         return is_borrowed    # xxx for tests only
 
-    def postprocess_graph(self, graph):
-        pass
-
     def annotate_helper(self, ll_helper, ll_args, ll_result, inline=False):
         assert not self.finished_helpers
         args_s = map(lltype_to_annotation, ll_args)
diff --git a/rpython/translator/c/database.py b/rpython/translator/c/database.py
--- a/rpython/translator/c/database.py
+++ b/rpython/translator/c/database.py
@@ -348,6 +348,7 @@
         assert not self.delayedfunctionptrs
         self.completed = True
         if self.gctransformer is not None and self.gctransformer.inline:
+            log.database("Inlining GC helpers and postprocessing")
             self.gctransformer.inline_helpers(self.all_graphs())
         if show_progress:
             dump()
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to