Author: Armin Rigo <>
Changeset: r369:9b140bbc577d
Date: 2017-03-08 09:16 +0100

Log:    Add a reasonable way (IMO) to run benchmarks locally and display

diff --git a/ b/
new file mode 100755
--- /dev/null
+++ b/
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+  first-filename  [second-filename]
+Displays the content of the file resulting from ''.  With
+two arguments, computes statistics and displays the differences.
+(Details: each file must result from a '--full-store' execution of
+''.  The "changed_times" keys are used and the "base_times"
+keys are discarded.  The option '--base1' and/or '--base2' can be used
+to pick the "base_times" instead in the first/second file.  These
+options are not useful if the files are produced by ''
+because it uses nullpython as the base.)
+import sys
+import json
+from unladen_swallow import perf
+def load_times(filename, base_times=False):
+    with open(filename) as f:
+        d = json.load(f)
+    if base_times:
+        key_times = "base_times"
+    else:
+        key_times = "changed_times"
+    result = {}
+    for lst2 in d['results']:
+        name = str(lst2[0])
+        if lst2[1] != 'RawResult':
+            print ("ERROR: entry for %r is not a RawResult "
+                   "(missing --full-store?)" % (name,))
+            continue
+        result[name] = lst2[2][key_times]
+    if not result:
+        print "No valid result in %r." % (filename,)
+        sys.exit(1)
+    return result
+def _report(row, raw1):
+    if raw1 is None:
+        row.append('')
+        row.append('')
+        row.append('')
+        row.append('')
+        row.append('')
+    elif len(raw1) == 1:
+        # A single result.  Report it.
+        row.append('')
+        row.append('')
+        row.append(str(round(raw1[0], 3)))
+        row.append('')
+        row.append('')
+    elif len(raw1) == 0:
+        # Should not occur
+        row.append('???')
+        row.append('')
+        row.append('empty')
+        row.append('')
+        row.append('???')
+    else:
+        # Multiple results.
+        t_min = min(raw1)
+        t_avg = perf.avg(raw1)
+        t_std = perf.SampleStdDev(raw1)
+        row.append(str(round(t_min, 3)))
+        row.append('')
+        row.append(str(round(t_avg, 3)))
+        row.append('')
+        row.append(str(round(t_std, 5)))
+    row.append('')
+    return raw1
+def display(times1, times2=None):
+    if times2 is None:
+        times2 = {}
+    all_names = sorted(set(times1) | set(times2))
+    table = [['BENCHMARK', '   ', 'min', ' ', 'avg', ' ', 'stddev', '  ',
+              'min', ' ', 'avg', ' ', 'stddev', '  ',
+              'diff']]
+    RIGHT_ALIGN = '\x00'
+    for name in all_names:
+        row = [name, '']
+        table.append(row)
+        raw1 = _report(row, times1.get(name))
+        raw2 = _report(row, times2.get(name))
+        if raw1 and raw2:
+            row.append(perf.TimeDelta(perf.avg(raw1), perf.avg(raw2)))
+    lengths = []
+    for row in table:
+        while len(lengths) < len(row):
+            lengths.append(0)
+        for i, cell in enumerate(row):
+            if len(cell) > lengths[i]:
+                lengths[i] = len(cell)
+    for row in table:
+        s = ''
+        for cell, l1 in zip(row, lengths):
+            if cell.startswith(RIGHT_ALIGN):
+                cell = ' '*(l1 - len(cell) - 1) + cell[1:]
+            s += cell + ' '*(l1 - len(cell))
+        print s
+def main(argv):
+    import optparse
+    parser = optparse.OptionParser(
+        usage="%prog first-filename [second-filename]",
+        description=__doc__)
+    parser.add_option("--base1", default=False, action="store_true",
+        help='Pick the "base_times" keys instead of the "changed_times"'
+             ' ones in the first file')
+    parser.add_option("--base2", default=False, action="store_true",
+        help='Pick the "base_times" keys instead of the "changed_times"'
+             ' ones in the second file')
+    options, args = parser.parse_args(argv)
+    if len(args) == 0:
+        parser.error("no filenames given; try --help")
+    elif len(args) > 2:
+        parser.error("too many filenames")
+    times1 = load_times(args[0], base_times=options.base1)
+    if len(args) > 1:
+        times2 = load_times(args[1], base_times=options.base2)
+    else:
+        times2 = None
+    display(times1, times2)
+if __name__ == '__main__':
+    main(sys.argv[1:])
diff --git a/ b/
new file mode 100755
--- /dev/null
+++ b/
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+ path/to/pypy-c -o output-filename <more options for>
+This is a wrapper script around that makes it easier to run
+locally all benchmarks on a single given pypy-c.  It stores the result
+in a JSON file given as 'output-filename'.  You can then run
+'' to display the output or the differences between two
+such output files.
+More options can be given on the command line and are passed to
+Common ones are:
+    --fast
+    --args=ARGS         arguments to give to pypy-c, must not contain a comma!
+import sys, os
+import subprocess
+if len(sys.argv) < 2 or sys.argv[1].startswith('-'):
+    print __doc__
+    sys.exit(2)
+pypy_c = sys.argv[1]
+localdir = os.path.dirname(sys.argv[0]) or '.'
+cmdline = [sys.executable, os.path.join(localdir, ''),
+           '--changed', pypy_c,
+           '--baseline', os.path.join(localdir, ''),
+           '--full-store',
+           ] + sys.argv[1:]
+print 'Executing', cmdline
+r =
+if r:
+    print >> sys.stderr, '*** exit code %r ***' % (r,)
+    sys.exit(r)
