Author: Armin Rigo <[email protected]>
Branch: shadowstack-perf-2
Changeset: r84818:c35fab8d27db
Date: 2016-05-29 19:10 +0200
http://bitbucket.org/pypy/pypy/changeset/c35fab8d27db/
Log: Finally found a way to handle mallocs correctly in RPython code,
i.e. without the two sources of nonsense overhead:
* only push/pop roots around the slow-path
* as a result, gcc can further optimize the fast-path: the address
that we grab from the nursery is not a NULL, no need to check that
again
diff --git a/rpython/memory/gctransform/framework.py
b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -609,8 +609,8 @@
"the custom trace hook %r for %r can cause "
"the GC to be called!" % (func, TP))
- def postprocess_graph(self, graph):
- self.root_walker.postprocess_graph(self, graph)
+ def postprocess_graph(self, graph, any_inlining):
+ self.root_walker.postprocess_graph(self, graph, any_inlining)
def consider_constant(self, TYPE, value):
self.layoutbuilder.consider_constant(TYPE, value, self.gcdata.gc)
diff --git a/rpython/memory/gctransform/shadowcolor.py
b/rpython/memory/gctransform/shadowcolor.py
--- a/rpython/memory/gctransform/shadowcolor.py
+++ b/rpython/memory/gctransform/shadowcolor.py
@@ -664,3 +664,69 @@
checkgraph(graph)
postprocess_double_check(graph)
return (regalloc is not None)
+
+
+def postprocess_inlining(graph):
+ """We first write calls to GC functions with gc_push_roots(...) and
+ gc_pop_roots(...) around. Then we inline some of these functions.
+ As a result, the gc_push_roots and gc_pop_roots are no longer in
+ the same block. Fix that by moving the gc_push_roots/gc_pop_roots
+ inside the inlined portion of the graph, around every call.
+
+ We could also get a correct result by doing things in a different
+ order, e.g. first postprocess_graph() and then inlining. However,
+ this order brings an important benefit: if the inlined graph has a
+ fast-path, like malloc_fixedsize(), then there are no gc_push_roots
+ and gc_pop_roots left along the fast-path.
+ """
+ for block in graph.iterblocks():
+ for i in range(len(block.operations)-1, -1, -1):
+ op = block.operations[i]
+ if op.opname == 'gc_pop_roots':
+ break
+ if op.opname == 'gc_push_roots':
+ _fix_graph_after_inlining(graph, block, i)
+ break
+
+def _fix_graph_after_inlining(graph, initial_block, initial_index):
+ op = initial_block.operations.pop(initial_index)
+ assert op.opname == 'gc_push_roots'
+ seen = set()
+ pending = [(initial_block, initial_index, op.args)]
+ while pending:
+ block, start_index, track_args = pending.pop()
+ if block in seen:
+ continue
+ seen.add(block)
+ assert block.operations != () # did not find the gc_pop_roots?
+ new_operations = block.operations[:start_index]
+ stop = False
+ for i in range(start_index, len(block.operations)):
+ op = block.operations[i]
+ if op.opname == 'gc_push_roots':
+ raise Exception("%r: seems to have inlined another graph "
+ "which also uses GC roots" % (graph,))
+ if op.opname == 'gc_pop_roots':
+ # end of the inlined graph, drop gc_pop_roots, keep the tail
+ new_operations += block.operations[i + 1:]
+ stop = True
+ break
+ if op.opname in ('direct_call', 'indirect_call'):
+ new_operations.append(SpaceOperation('gc_push_roots',
+ track_args[:],
+ varoftype(lltype.Void)))
+ new_operations.append(op)
+ new_operations.append(SpaceOperation('gc_pop_roots',
+ track_args[:],
+ varoftype(lltype.Void)))
+ else:
+ new_operations.append(op)
+ block.operations = new_operations
+ if not stop:
+ for link in block.exits:
+ track_next = []
+ for v in track_args:
+ i = link.args.index(v) # should really be here
+ w = link.target.inputargs[i]
+ track_next.append(w)
+ pending.append((link.target, 0, track_next))
diff --git a/rpython/memory/gctransform/shadowstack.py
b/rpython/memory/gctransform/shadowstack.py
--- a/rpython/memory/gctransform/shadowstack.py
+++ b/rpython/memory/gctransform/shadowstack.py
@@ -224,8 +224,10 @@
from rpython.rlib import _stacklet_shadowstack
_stacklet_shadowstack.complete_destrptr(gctransformer)
- def postprocess_graph(self, gct, graph):
+ def postprocess_graph(self, gct, graph, any_inlining):
from rpython.memory.gctransform import shadowcolor
+ if any_inlining:
+ shadowcolor.postprocess_inlining(graph)
use_push_pop = shadowcolor.postprocess_graph(graph, gct.c_const_gcdata)
if use_push_pop and graph in gct.graphs_to_inline:
log.WARNING("%r is marked for later inlining, "
diff --git a/rpython/memory/gctransform/transform.py
b/rpython/memory/gctransform/transform.py
--- a/rpython/memory/gctransform/transform.py
+++ b/rpython/memory/gctransform/transform.py
@@ -97,6 +97,7 @@
self.inline = inline
if translator and inline:
self.lltype_to_classdef =
translator.rtyper.lltype_to_classdef_mapping()
+ self.raise_analyzer = RaiseAnalyzer(translator)
self.graphs_to_inline = {}
self.graph_dependencies = {}
self.ll_finalizers_ptrs = []
@@ -113,28 +114,33 @@
self.seen_graphs.add(graph)
self.minimal_transform.add(graph)
+ def inline_helpers_into(self, graph):
+ from rpython.translator.backendopt.inline import iter_callsites
+ to_enum = []
+ for called, block, i in iter_callsites(graph, None):
+ if called in self.graphs_to_inline:
+ to_enum.append(called)
+ any_inlining = False
+ for inline_graph in to_enum:
+ try:
+ inline.inline_function(self.translator, inline_graph, graph,
+ self.lltype_to_classdef,
+ self.raise_analyzer,
+ cleanup=False)
+ any_inlining = True
+ except inline.CannotInline as e:
+ print 'CANNOT INLINE:', e
+ print '\t%s into %s' % (inline_graph, graph)
+ raise # for now, make it a fatal error
+ cleanup_graph(graph)
+ if any_inlining:
+ constant_fold_graph(graph)
+ return any_inlining
+
def inline_helpers(self, graphs):
- from rpython.translator.backendopt.inline import iter_callsites
- raise_analyzer = RaiseAnalyzer(self.translator)
for graph in graphs:
- to_enum = []
- for called, block, i in iter_callsites(graph, None):
- if called in self.graphs_to_inline:
- to_enum.append(called)
- must_constfold = False
- for inline_graph in to_enum:
- try:
- inline.inline_function(self.translator, inline_graph,
graph,
- self.lltype_to_classdef,
- raise_analyzer,
- cleanup=False)
- must_constfold = True
- except inline.CannotInline as e:
- print 'CANNOT INLINE:', e
- print '\t%s into %s' % (inline_graph, graph)
- cleanup_graph(graph)
- if must_constfold:
- constant_fold_graph(graph)
+ any_inlining = self.inline and self.inline_helpers_into(graph)
+ self.postprocess_graph(graph, any_inlining)
def compute_borrowed_vars(self, graph):
# the input args are borrowed, and stay borrowed for as long as they
@@ -236,8 +242,6 @@
else:
insert_empty_block(link, llops)
- self.postprocess_graph(graph)
-
# remove the empty block at the start of the graph, which should
# still be empty (but let's check)
if starts_with_empty_block(graph) and inserted_empty_startblock:
@@ -254,9 +258,6 @@
graph.exc_cleanup = (v, list(llops))
return is_borrowed # xxx for tests only
- def postprocess_graph(self, graph):
- pass
-
def annotate_helper(self, ll_helper, ll_args, ll_result, inline=False):
assert not self.finished_helpers
args_s = map(lltype_to_annotation, ll_args)
diff --git a/rpython/translator/c/database.py b/rpython/translator/c/database.py
--- a/rpython/translator/c/database.py
+++ b/rpython/translator/c/database.py
@@ -348,6 +348,7 @@
assert not self.delayedfunctionptrs
self.completed = True
if self.gctransformer is not None and self.gctransformer.inline:
+ log.database("Inlining GC helpers and postprocessing")
self.gctransformer.inline_helpers(self.all_graphs())
if show_progress:
dump()
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit