commit bb46630513ddfebd6e6dc79491912412bb8985ff Author: Nick Mathewson <ni...@torproject.org> Date: Tue Aug 11 09:07:54 2015 -0400
Hack up the scripts/maint/*callgraph* scripts to do more, better These scripts are now a little more bulletproof, cache data a little better, and generate more information. Notably, they search for the vectors or edges to cut that would lower the size of the largest SCC. --- scripts/maint/analyze_callgraph.py | 120 +++++++++++++++++++++++++++++++++--- scripts/maint/display_callgraph.py | 25 ++++++-- 2 files changed, 132 insertions(+), 13 deletions(-) diff --git a/scripts/maint/analyze_callgraph.py b/scripts/maint/analyze_callgraph.py index 69ae128..b284604 100755 --- a/scripts/maint/analyze_callgraph.py +++ b/scripts/maint/analyze_callgraph.py @@ -13,7 +13,7 @@ class Parser: def enter_func(self, name): if self.infunc and not self.extern: self.calls.setdefault(self.infunc, set()).update( self.calledfns ) - + self.calledfns = set() self.infunc = name self.extern = False @@ -23,7 +23,7 @@ class Parser: self.extern = False self.calledfns = set() for line in inp: - m = re.match(r"Call graph node for function: '([^']+)'", line) + m = re.match(r"Call graph node for function: '([^']+)'", line) if m: self.enter_func(m.group(1)) continue @@ -42,18 +42,28 @@ class Parser: def transitive_closure(g): + passno = 0 changed = True g = copy.deepcopy(g) + import random while changed: + passno += 1 changed = False - print "X" - for k in g.keys(): + keys = g.keys() + idx = 0 + for k in keys: + idx += 1 + print "Pass %d/?: %d/%d\r" %(passno, idx, len(keys)), + sys.stdout.flush() newset = g[k].copy() for fn in g[k]: newset.update(g.get(fn, set())) if len(newset) != len(g[k]): g[k].update( newset ) changed = True + + print + return g def strongly_connected_components(g): @@ -96,22 +106,114 @@ def strongly_connected_components(g): return all_sccs +def biggest_component(sccs): + return max(len(c) for c in sccs) + +def connection_bottlenecks(callgraph): + + callers = {} + for fn in callgraph: + for fn2 in callgraph[fn]: + callers.setdefault(fn2, set()).add(fn) + + components = strongly_connected_components(callgraph) + components.sort(key=len) + big_component_fns = components[-1] + size = len(big_component_fns) + + function_bottlenecks = fn_results = [] + + total = len(big_component_fns) + idx = 0 + for fn in big_component_fns: + idx += 1 + print "Pass 1/3: %d/%d\r"%(idx, total), + sys.stdout.flush() + cg2 = copy.deepcopy(callgraph) + del cg2[fn] + + fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn) ) + + print + bcf_set = set(big_component_fns) + + call_bottlenecks = fn_results = [] + result_set = set() + total = len(big_component_fns) + idx = 0 + for fn in big_component_fns: + fn_callers = callers[fn].intersection(bcf_set) + idx += 1 + if len(fn_callers) != 1: + continue + + print "Pass 2/3: %d/%d\r"%(idx, total), + sys.stdout.flush() + + caller = fn_callers.pop() + assert len(fn_callers) == 0 + cg2 = copy.deepcopy(callgraph) + cg2[caller].remove(fn) + + fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn, "called by", caller) ) + result_set.add( (caller, fn) ) + + print + + total = len(big_component_fns) + idx = 0 + for fn in big_component_fns: + fn_calls = callgraph[fn].intersection(bcf_set) + idx += 1 + if len(fn_calls) != 1: + continue + + print "Pass 3/3: %d/%d\r"%(idx, total), + sys.stdout.flush() + + callee = fn_calls.pop() + if (fn, callee) in result_set: + continue + + assert len(fn_calls) == 0 + cg2 = copy.deepcopy(callgraph) + cg2[fn].remove(callee) + + fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), callee, "called by", fn) ) + + print + + + return (function_bottlenecks, call_bottlenecks) + if __name__ == '__main__': p = Parser() for fname in sys.argv[1:]: with open(fname, 'r') as f: p.parse_callgraph_file(f) + + sys.stdout.flush + + print "Building callgraph" callgraph = p.extract_callgraph() + print "Finding strongly connected components" sccs = strongly_connected_components(callgraph) + print "Finding the transitive closure of the callgraph.." closure = transitive_closure(callgraph) + print "Finding bottlenecks..." + bottlenecks = connection_bottlenecks(callgraph) + + data = { + 'callgraph' : callgraph, + 'sccs' : sccs, + 'closure' : closure, + 'bottlenecks' : bottlenecks } + with open('callgraph.pkl', 'w') as f: - cPickle.dump(callgraph, f) + cPickle.dump(data, f) + - with open('callgraph_closure.pkl', 'w') as f: - cPickle.dump(closure, f) - with open('callgraph_sccs.pkl', 'w') as f: - cPickle.dump(sccs, f) diff --git a/scripts/maint/display_callgraph.py b/scripts/maint/display_callgraph.py index 9ead56c..211bfda 100755 --- a/scripts/maint/display_callgraph.py +++ b/scripts/maint/display_callgraph.py @@ -2,9 +2,12 @@ import cPickle -callgraph = cPickle.load(open("callgraph.pkl")) -closure = cPickle.load(open("callgraph_closure.pkl")) -sccs = cPickle.load(open("callgraph_sccs.pkl")) +data = cPickle.load(open("callgraph.pkl")) + +callgraph = data['callgraph'] +closure = data['closure'] +sccs = data['sccs'] +fn_bottle, call_bottle = data['bottlenecks'] for n_reachable, fn in sorted(list((len(r), fn) for fn, r in closure.iteritems())): print "%s can reach %s other functions." %(fn, n_reachable) @@ -13,10 +16,24 @@ for n_reachable, fn in sorted(list((len(r), fn) for fn, r in closure.iteritems() c = [ (len(component), component) for component in sccs ] c.sort() +print "\n================================" + for n, component in c: if n < 2: continue - print n, component + print "Strongly connected component of size %d:"%n + print component + + +print "\n================================" +print "====== Number of functions pulled into blob, by function in blob." +fn_bottle.sort() +for n, fn in fn_bottle[-30:]: + print "%3d: %s"%(n, fn) +print "====== Number of functions pulled into blob, by call in blob." +call_bottle.sort() +for n, fn1, _, fn2 in call_bottle[-30:]: + print "%3d: %s -> %s "%(n, fn2, fn1) _______________________________________________ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits