Author: Anton Gulenko <[email protected]>
Branch: storage
Changeset: r856:f4ae062456ae
Date: 2014-07-01 12:12 +0200
http://bitbucket.org/pypy/lang-smalltalk/changeset/f4ae062456ae/
Log: Added script to parse storage-log and convert to summary or dot-
string/graph.
diff --git a/spyvm/tool/storagelog_parser.py b/spyvm/tool/storagelog_parser.py
new file mode 100644
--- /dev/null
+++ b/spyvm/tool/storagelog_parser.py
@@ -0,0 +1,529 @@
+
+import re, sys, operator
+
+OPERATIONS = ["Filledin", "Initialized", "Switched"]
+
+# ====================================================================
+# ======== Basic functions
+# ====================================================================
+
+def parse(filename, flags):
+ entries = []
+ with open(filename, 'r', 1) as file:
+ while True:
+ line = file.readline()
+ if len(line) == 0:
+ break
+ entry = parse_line(line, flags)
+ if entry:
+ entries.append(entry)
+ return entries
+
+line_pattern = re.compile("^(?P<operation>\w+) \(((?P<old>\w+) ->
)?(?P<new>\w+)\)( of (?P<classname>.+))? size (?P<size>[0-9]+)$")
+
+def parse_line(line, flags):
+ result = line_pattern.match(line)
+ if result is None:
+ if flags.verbose:
+ print "Could not parse line: %s" % line[:-1]
+ return None
+ operation = result.group('operation')
+ old_storage = result.group('old')
+ new_storage = result.group('new')
+ classname = result.group('classname')
+ size = result.group('size')
+ if old_storage is None:
+ if operation == "Filledin":
+ old_storage = " Image Loading Storage" # Space to be sorted to the
beginning
+ elif operation == "Initialized":
+ old_storage = " Object Creation Storage"
+ else:
+ assert False, "old_storage has to be available in a Switched
operation"
+ entry = LogEntry(operation, old_storage, new_storage, classname, size)
+ #entry.is_special =
+ return entry
+
+class LogEntry(object):
+
+ def __init__(self, operation, old_storage, new_storage, classname, size):
+ self.operation = str(operation)
+ self.old_storage = str(old_storage)
+ self.new_storage = str(new_storage)
+ self.classname = str(classname)
+ self.size = float(size)
+
+ def full_key(self):
+ return (self.operation, self.old_storage, self.new_storage)
+
+ def __str__(self):
+ old_storage_string = "%s -> " % self.old_storage if self.old_storage
else ""
+ classname_string = " of %s" % self.classname if self.classname else ""
+ return "%s (%s%s)%s size %d" % (self.operation, old_storage_string,
self.new_storage, classname_string, self.size)
+
+# ====================================================================
+# ======== Graph parsing
+# ====================================================================
+
+class Operations(object):
+
+ def __init__(self, objects=0, slots=0):
+ self.objects = objects
+ self.slots = slots
+
+ def __str__(self, total=None):
+ if self.objects == 0:
+ avg_slots = 0
+ else:
+ avg_slots = float(self.slots) / self.objects
+ if total is not None and total.slots != 0:
+ percent_slots = " (%.1f%%)" % (float(self.slots)*100 / total.slots)
+ else:
+ percent_slots = ""
+ if total is not None and total.objects != 0:
+ percent_objects = " (%.1f%%)" % (float(self.objects)*100 /
total.objects)
+ else:
+ percent_objects = ""
+ return "%d%s slots in %d%s objects (avg size: %.1f)" % (self.slots,
percent_slots, self.objects, percent_objects, avg_slots)
+
+ def __repr__(self):
+ return "%s(%s)" % (self.__str__(), object.__repr__(self))
+
+ def add_log_entry(self, entry):
+ self.slots = self.slots + entry.size
+ self.objects = self.objects + 1
+
+ def __sub__(self, other):
+ return Operations(self.objects - other.objects, self.slots -
other.slots)
+
+ def __add__(self, other):
+ return Operations(self.objects + other.objects, self.slots +
other.slots)
+
+ def __lt__(self, other):
+ return self.slots < other.slots
+
+ def empty(self):
+ return self.objects == 0 and self.slots == 0
+
+ def prefixprint(self, key="", total=None):
+ if not self.empty():
+ print "%s%s" % (key, self.__str__(total))
+
+class ClassOperations(object):
+
+ def __init__(self):
+ self.classes = {}
+
+ def cls(self, name):
+ if name not in self.classes:
+ self.classes[name] = Operations()
+ return self.classes[name]
+
+ def total(self):
+ return reduce(operator.add, self.classes.values(), Operations())
+
+ def __str__(self):
+ return "ClassOperations(%s)" % self.classes
+
+ def __repr__(self):
+ return "%s(%s)" % (self.__str__(), object.__repr__(self))
+
+ def __add__(self, other):
+ result = ClassOperations()
+ result.classes = dict(self.classes)
+ for classname, other_class in other.classes.items():
+ result.cls(classname) # Make sure exists.
+ result.classes[classname] += other_class
+ return result
+
+ def __sub__(self, other):
+ result = ClassOperations()
+ result.classes = dict(self.classes)
+ for classname, other_class in other.classes.items():
+ result.cls(classname) # Make sure exists.
+ result.classes[classname] -= other_class
+ return result
+
+class StorageEdge(object):
+
+ def __init__(self, operation="None", origin=None, target=None):
+ assert operation == "None" or operation in OPERATIONS, "Unknown
operation %s" % operation
+ self.operation = operation
+ self.classes = ClassOperations()
+ self.origin = origin
+ self.target = target
+
+ def full_key(self):
+ return (self.operation, self.origin.name, self.target.name)
+
+ def cls(self, classname):
+ return self.classes.cls(classname)
+
+ def total(self):
+ return self.classes.total()
+
+ def notify_nodes(self):
+ self.origin.note_outgoing(self)
+ self.target.note_incoming(self)
+
+ def add_log_entry(self, entry):
+ self.cls(entry.classname).add_log_entry(entry)
+
+ def __str__(self):
+ return "[%s %s -> %s]" % (self.operation, self.origin, self.target)
+
+ def __repr__(self):
+ return "%s(%s)" % (self.__str__(), object.__repr__(self))
+
+ def __add__(self, other):
+ origin = self.origin if self.origin is not None else other.origin
+ target = self.target if self.target is not None else other.target
+ result = StorageEdge(self.operation, origin, target)
+ result.classes += self.classes + other.classes
+ return result
+
+ def __sub__(self, other):
+ origin = self.origin if self.origin is not None else other.origin
+ target = self.target if self.target is not None else other.target
+ result = StorageEdge(self.operation, origin, target)
+ result.classes += self.classes - other.classes
+ return result
+
+class StorageNode(object):
+
+ def __init__(self, name):
+ self.name = name
+ self.incoming = set()
+ self.outgoing = set()
+
+ def note_incoming(self, edge):
+ assert edge.target is self
+ if edge not in self.incoming:
+ self.incoming.add(edge)
+
+ def note_outgoing(self, edge):
+ assert edge.origin is self
+ if edge not in self.outgoing:
+ self.outgoing.add(edge)
+
+ def incoming_edges(self, operation):
+ return filter(lambda x: x.operation == operation, self.incoming)
+
+ def outgoing_edges(self, operation):
+ return filter(lambda x: x.operation == operation, self.outgoing)
+
+ def sum_incoming(self, operation):
+ return reduce(operator.add, self.incoming_edges(operation),
StorageEdge(operation))
+
+ def sum_outgoing(self, operation):
+ return reduce(operator.add, self.outgoing_edges(operation),
StorageEdge(operation))
+
+ def sum_all_incoming(self):
+ return reduce(operator.add, self.incoming, StorageEdge())
+
+ def sum_all_outgoing(self):
+ return reduce(operator.add, self.outgoing, StorageEdge())
+
+ def __str__(self):
+ return self.name
+
+ def __repr__(self):
+ return "%s(%s)" % (self.__str__(), object.__repr__(self))
+
+ def merge_edge_sets(self, set1, set2, key_slot):
+ getter = lambda edge: edge.__dict__[key_slot]
+ set_dict = dict([(getter(edge), edge) for edge in set1])
+ for edge in set2:
+ key = getter(edge)
+ if key not in set_dict:
+ set_dict[key] = edge
+ else:
+ set_dict[key] += edge
+ return set(set_dict.values())
+
+ def __add__(self, other):
+ result = StorageNode("%s_%s" % (self.name, other.name))
+ result.incoming = self.merge_edge_sets(self.incoming, other.incoming,
"origin")
+ # TODO bullshit code
+ for edge in result.incoming:
+ edge.target = result
+ result.outgoing = self.merge_edge_sets(self.outgoing, other.outgoing,
"target")
+ for edge in result.outgoing:
+ edge.origin = result
+ return result
+
+ def __lt__(self, other):
+ return self.name < other.name
+
+class StorageGraph(object):
+
+ def __init__(self):
+ self.nodes = {}
+ self.edges = {}
+
+ def node(self, name):
+ if name not in self.nodes:
+ self.nodes[name] = StorageNode(name)
+ return self.nodes[name]
+
+ def assert_sanity(self):
+ visited_edges = set()
+ for node in self.nodes.values():
+ for edge in node.incoming:
+ assert edge in self.edges.values(), "Edge not in graph's
edges: %s" % edge
+ visited_edges.add(edge)
+ if not edge.target is node:
+ print "Wrong edge target: %s\nIncoming edge: %s\nIn node:
%s" % (edge.target, edge, node)
+ assert False
+ if not edge in edge.origin.outgoing:
+ print "Edge not in origin's outgoing: %s\nIncoming edge:
%s\nIn node: %s" % (edge.origin.outgoing, edge, node)
+ assert False
+ for edge in node.outgoing:
+ assert edge in self.edges.values(), "Edge not in graph's
edges: %s" % edge
+ visited_edges.add(edge)
+ if not edge.origin is node:
+ print "Wrong edge origin: %s\nOutgoing edge: %s\nIn node:
%s" % (edge.origin, edge, node)
+ assert False
+ if not edge in edge.target.incoming:
+ print "Edge not in origin's incoming: %s\nOutgoing edge:
%s\nIn node: %s" % (edge.target.incoming, edge, node)
+ assert False
+ assert len(visited_edges) == len(self.edges.values()), "Not all of
graph's edges visited."
+
+ def add_log_entry(self, log_entry):
+ key = log_entry.full_key()
+ if key not in self.edges:
+ edge = StorageEdge(log_entry.operation,
self.node(log_entry.old_storage), self.node(log_entry.new_storage))
+ self.edges[key] = edge
+ edge.notify_nodes()
+ self.edges[key].add_log_entry(log_entry)
+
+ def collapse_nodes(self, collapsed_nodes, new_name=None):
+ for node in collapsed_nodes:
+ del self.nodes[node.name]
+ for edge in node.incoming:
+ del self.edges[edge.full_key()]
+ for edge in node.outgoing:
+ del self.edges[edge.full_key()]
+ new_node = reduce(operator.add, collapsed_nodes)
+ if new_name is not None:
+ new_node.name = new_name
+ self.nodes[new_node.name] = new_node
+ # TODO bullshit code
+ for node in collapsed_nodes:
+ for edge in node.incoming:
+ edge.origin.outgoing.remove(edge)
+ new_edges = filter(lambda filtered: filtered.origin ==
edge.origin, new_node.incoming)
+ assert len(new_edges) == 1
+ edge.origin.outgoing.add(new_edges[0])
+ for edge in node.outgoing:
+ edge.target.incoming.remove(edge)
+ new_edges = filter(lambda filtered: filtered.target ==
edge.target, new_node.outgoing)
+ assert len(new_edges) == 1
+ edge.target.incoming.add(new_edges[0])
+ for edge in new_node.incoming:
+ self.edges[edge.full_key()] = edge
+ for edge in new_node.outgoing:
+ self.edges[edge.full_key()] = edge
+ self.assert_sanity()
+
+ def split_nodes(self, new_name=None):
+ nodes = filter(lambda node: "Storage" not in node.name,
self.nodes.values())
+ self.collapse_nodes(nodes, new_name)
+
+ def sorted_nodes(self):
+ nodes = self.nodes.values()
+ nodes.sort()
+ return nodes
+
+def make_graph(entries):
+ graph = StorageGraph()
+ for e in entries:
+ graph.add_log_entry(e)
+ graph.assert_sanity()
+ return graph
+
+# ====================================================================
+# ======== Command - Summarize log content
+# ====================================================================
+
+def command_summarize(entries, flags):
+ print_summary(entries, flags)
+
+def print_summary(entries, flags):
+ graph = make_graph(entries)
+ if not flags.allstorage:
+ graph.split_nodes()
+ for node in graph.sorted_nodes():
+ node.print_summary(flags)
+
+def StorageNode_print_summary(self, flags):
+ print "\n%s:" % self.name
+ sum = StorageEdge()
+ total_incoming = self.sum_all_incoming().total() if flags.percent else None
+
+ print "\tIncoming:"
+ for operation in OPERATIONS:
+ if flags.detailed:
+ edges = [ (edge.origin.name, edge) for edge in
self.incoming_edges(operation) ]
+ else:
+ edges = [ (operation, self.sum_incoming(operation)) ]
+ for edgename, edge in edges:
+ edge.print_with_name("\t\t\t", edgename, total_incoming, flags)
+ sum += edge
+
+ print "\tOutgoing:"
+ for operation in OPERATIONS:
+ if flags.detailed:
+ edges = [ (edge.target.name, edge) for edge in
self.outgoing_edges(operation) ]
+ else:
+ edges = [ (operation, self.sum_outgoing(operation)) ]
+ for edgename, edge in edges:
+ edge.print_with_name("\t\t\t", edgename, total_incoming, flags)
+ sum -= edge
+
+ sum.print_with_name("\t", "Remaining", total_incoming, flags)
+
+StorageNode.print_summary = StorageNode_print_summary
+
+def StorageEdge_print_with_name(self, prefix, edgename, total_reference,
flags):
+ if flags.classes:
+ print "%s%s:" % (prefix, edgename)
+ prefix += "\t\t"
+ operations = self.classes.classes.items()
+ operations.sort(reverse=True, key=operator.itemgetter(1))
+ else:
+ operations = [ (edgename, self.total()) ]
+ for classname, classops in operations:
+ classops.prefixprint("%s%s: " % (prefix, classname), total_reference)
+
+StorageEdge.print_with_name = StorageEdge_print_with_name
+
+# ====================================================================
+# ======== Command - DOT output
+# ====================================================================
+
+# Output is valid dot code and can be parsed by the graphviz dot utility.
+def command_print_dot(entries, flags):
+ graph = make_graph(entries)
+ print "/*"
+ print "Storage Statistics (dot format):"
+ print "================================"
+ print "*/"
+ print dot_string(graph, flags)
+
+def command_dot(entries, flags):
+ import subprocess
+ dot = dot_string(make_graph(entries), flags)
+ command = ["dot", "-Tjpg", "-o%s.jpg" % flags.logfile]
+ print "Running:\n%s" % " ".join(command)
+ p = subprocess.Popen(command, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ output = p.communicate(input=dot)[0]
+ print output
+
+def dot_string(graph, flags):
+ result = "digraph G {"
+ incoming_cache = {}
+ if not flags.allstorage:
+ graph.split_nodes("Other")
+
+ for node in graph.nodes.values():
+ incoming = node.sum_all_incoming().total()
+ outgoing = node.sum_all_outgoing().total()
+ remaining = incoming - outgoing
+ if remaining.objects < 0:
+ # TODO This is a special node. Hacky way to find out.
+ incoming_cache[node.name] = outgoing
+ shape = ",shape=box"
+ label = "\nObjects: %d" % outgoing.objects
+ label += "\nSlots: %d" % outgoing.slots
+ else:
+ incoming_cache[node.name] = incoming
+ shape = ""
+ label = "\nIncoming objects: %d" % incoming.objects
+ label += "\nIncoming elements: %d" % incoming.slots
+ if flags.percent and incoming.objects != 0:
+ percent_remaining_objects = " (%.1f%%)" % (remaining.objects *
100 / incoming.objects)
+ percent_remaining_slots = " (%.1f%%)" % (remaining.slots * 100
/ incoming.slots)
+ else:
+ percent_remaining_objects = percent_remaining_slots = ""
+ label += "\nRemaining objects: %d%s" % (remaining.objects,
percent_remaining_objects)
+ label += "\nRemaining elements: %d%s" % (remaining.slots,
percent_remaining_slots)
+ result += "%s [label=\"%s%s\"%s];" % (node.name.replace(" ", "_"),
node.name, label, shape)
+
+ for edge in graph.edges.values():
+ total = edge.total()
+ str_objects = "%d objects" % total.objects
+ str_slots = "%d slots" % total.slots
+ incoming = incoming_cache[edge.origin.name]
+ if flags.percent and incoming.objects != 0:
+ str_objects += " (%.1f%%)" % (float(total.objects) * 100 /
incoming.objects)
+ str_slots += " (%.1f%%)" % (float(total.slots) * 100 /
incoming.slots)
+
+ target_node = edge.target.name.replace(" ", "_")
+ source_node = edge.origin.name.replace(" ", "_")
+ result += "%s -> %s [label=\"%s\n%s\n%d slots per object\"];" %
(source_node, target_node, str_objects, str_slots, total.slots / total.objects)
+
+ result += "}"
+ return result
+
+# ====================================================================
+# ======== Main
+# ====================================================================
+
+def command_print_entries(entries):
+ for e in entries:
+ print e
+
+class Flags(object):
+
+ def __init__(self, flags):
+ self.flags = {}
+ for name, short in flags:
+ self.__dict__[name] = False
+ self.flags[short] = name
+
+ def handle(self, arg):
+ if arg in self.flags:
+ self.__dict__[self.flags[arg]] = True
+ return True
+ else:
+ return False
+
+ def __str__(self):
+ descriptions = [ ("%s (%s)" % description) for description in
self.flags.items() ]
+ return "[%s]" % " | ".join(descriptions)
+
+def usage(flags, commands):
+ print "Arguments: logfile command %s" % flags
+ print "Available commands: %s" % commands
+ exit(1)
+
+def main(argv):
+ flags = Flags([
+ ('verbose', '-v'),
+ ('percent', '-p'),
+ ('allstorage', '-a'),
+ ('detailed', '-d'),
+ ('classes', '-c'),
+ ])
+
+ command_prefix = "command_"
+ module = sys.modules[__name__].__dict__
+ commands = [ a[len(command_prefix):] for a in module.keys() if
a.startswith(command_prefix) ]
+
+ if len(argv) < 2:
+ usage(flags, commands)
+ logfile = argv[0]
+ flags.logfile = logfile
+ command = argv[1]
+ for flag in argv[2:]:
+ if not flags.handle(flag):
+ usage(flags, commands)
+ if command not in commands:
+ usage(flags, commands)
+
+ func = module[command_prefix + command]
+ entries = parse(logfile, flags)
+ func(entries, flags)
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit