[pypy-commit] benchmarks single-run: Kill a whole lot of stupid mess - now we can have one interpreter benchmarked

fijal Thu, 01 Aug 2013 10:52:47 -0700

Author: Maciej Fijalkowski <[email protected]>
Branch: single-run
Changeset: r215:9b79fbc02de1
Date: 2013-08-01 19:49 +0200
http://bitbucket.org/pypy/benchmarks/changeset/9b79fbc02de1/


Log:    Kill a whole lot of stupid mess - now we can have one interpreter
        benchmarked

diff --git a/benchmarks.py b/benchmarks.py
--- a/benchmarks.py
+++ b/benchmarks.py
@@ -1,7 +1,7 @@
 import os
 import logging
 from unladen_swallow.perf import SimpleBenchmark, MeasureGeneric
-from unladen_swallow.perf import RawResult, SimpleComparisonResult, avg, 
ResultError
+from unladen_swallow.perf import RawResult, SimpleResult, avg, ResultError
 import subprocess
 
 def relative(*args):
@@ -50,7 +50,7 @@
                                            *args, **kwargs)
         except subprocess.CalledProcessError, e:
             return ResultError(e)
-        return SimpleComparisonResult(avg(base_data[0]), -1, -1)
+        return SimpleResult(avg(base_data[0]))
     BM.func_name = 'BM_' + bm_name
 
     d[BM.func_name] = BM
diff --git a/runner.py b/runner.py
--- a/runner.py
+++ b/runner.py
@@ -15,17 +15,14 @@
                  'rietveld', 'html5lib', 'ai']
 BENCHMARK_SET += perf._FindAllBenchmarks(benchmarks.__dict__).keys()
 
-CHANGED = 'changed'
-BASELINE = 'baseline'
-
 
 class WrongBenchmark(Exception):
     pass
 
 
-def run_and_store(benchmark_set, result_filename, changed_path, revision=0,
+def run_and_store(benchmark_set, result_filename, path, revision=0,
                   options='', branch='default', args='', upload=False,
-                  fast=False, baseline_path=sys.executable, full_store=False):
+                  fast=False, full_store=False):
     funcs = perf.BENCH_FUNCS.copy()
     funcs.update(perf._FindAllBenchmarks(benchmarks.__dict__))
     opts = ['-b', ','.join(benchmark_set),
@@ -37,7 +34,7 @@
         opts += ['--args', args]
     if full_store:
         opts += ['--no_statistics']
-    opts += [baseline_path, changed_path]
+    opts += [path]
     results = perf.main(opts, funcs)
     f = open(str(result_filename), "w")
     results = [(name, result.__class__.__name__, result.__dict__)
@@ -51,73 +48,6 @@
     f.close()
     return results
 
-
-def get_upload_options(options):
-    """
-    returns a dict with 2 keys: CHANGED, BASELINE. The values are
-    dicts with the keys
-    * 'upload' (boolean)
-    * 'project' (string)
-    * 'executable' (string)
-    * 'urls (list of strings).
-    * 'branch' (string)
-    * 'revision' (string)
-
-    This correspondents to the the --upload* and --upload-baseline*
-    options.
-
-    raises: AssertionError if upload is specified, but not the
-    corresponding executable or revision.
-    """
-
-    if options.upload_baseline_revision is None:
-        options.upload_baseline_revision = options.upload_revision
-
-    upload_options = {}
-
-    for run in [CHANGED, BASELINE]:
-
-        def get_upload_option(name):
-            attr_name = 'upload'
-            if run == BASELINE:
-                attr_name = '%s_baseline' % attr_name
-            if name:
-                attr_name = '%s_%s' % (attr_name, name)
-            return getattr(options, attr_name)
-
-        urls = get_upload_option('urls')
-        urls = [url.strip() for url in urls.split(',') if url.strip()]
-        upload = get_upload_option(None)
-        project = get_upload_option('project')
-        executable = get_upload_option('executable')
-        branch = get_upload_option('branch')
-        revision = get_upload_option('revision')
-        if upload:
-            if executable is None:
-                raise AssertionError('If you want to --upload[-baseline] you '
-                                     'have to specify the corresponding '
-                                     '--upload[-baseline]-executable')
-            if revision is None:
-                raise AssertionError('If you want to upload the result you '
-                                     'have to specify a --revision (or '
-                                     '--upload-baseline-revision if you '
-                                     'want to upload the baseline result')
-            if ((run == BASELINE and 'nullpython.py' in options.baseline) or
-                (run == CHANGED and 'nullpython.py' in options.changed)):
-                raise AssertionError("Don't upload data from the nullpython "
-                                     "dummy interpreter. It won't run any "
-                                     "real benchmarks.")
-
-        upload_options[run] = {
-            'upload': upload,
-            'project': project,
-            'executable': executable,
-            'urls': urls,
-            'branch': branch,
-            'revision': revision}
-    return upload_options
-
-
 def main(argv):
     import optparse
     parser = optparse.OptionParser(
@@ -137,13 +67,8 @@
               ". (default: Run all listed benchmarks)"
               ) % ", ".join(sorted(BENCHMARK_SET)))
     benchmark_group.add_option(
-        '-c', '--changed', default=sys.executable,
-        help=('pypy-c or another modified python interpreter to run against. '
-              'Also named the "changed" interpreter. (default: the python '
-              'used to run this script)'))
-    benchmark_group.add_option(
-        '--baseline', default=sys.executable, action='store',
-        help=('Baseline interpreter. (default: the python used to '
+        '-p', '--python', default=sys.executable, action='store',
+        help=('Interpreter. (default: the python used to '
               'run this script)'))
     benchmark_group.add_option(
         '-o', '--output-filename', default="result.json",
@@ -182,89 +107,28 @@
         help="Run the benchmarks with the --no-statistics flag.")
     parser.add_option_group(benchmark_group)
 
-    # upload changed options
-    upload_group = optparse.OptionGroup(
-        parser, 'Upload Options',
-        ('Options for uploading the result of the "changed" python to '
-         'codespeed. The information about revision and branch will '
-         'be taken from the options --revision and --branch.'))
-    upload_group.add_option(
-        "--upload", default=None, action="store_true",
-        help=("Upload results to speed.pypy.org (unless "
-              "--upload-url is given)."))
-    upload_group.add_option(
-        "--upload-urls", default="http://speed.pypy.org/";,
-        help=("Comma seperated urls of the codespeed instances "
-              "to upload to. (default: http://speed.pypy.org/)"))
-    upload_group.add_option(
-        "--upload-project", default="PyPy",
-        help="The project name in codespeed. (default: PyPy)")
-    upload_group.add_option(
-        "--upload-executable", default=None,
-        help=("The executable name in codespeed. (required if --upload "
-              "is given)"))
-    parser.add_option_group(upload_group)
+    parser.add_option("--upload-url", action="store", default=None,
+                      help="Upload to url or None")
+    parser.add_option("--upload-revision", action="store", default=None,
+                      help="Upload revision")
+    parser.add_option("--upload-branch", action="store", default=None,
+                      help="Upload branch")
+    parser.add_option("--upload-project", action="store", default="PyPy")
+    parser.add_option("--upload-executable", action="store", default="pypy-c")
     parser.add_option(
         "--force-host", default=None, action="store",
-        help=("Force the hostname. This option will also be used when "
-              "uploading the baseline result."))
+        help=("Force the hostname."))
     parser.add_option("--niceness", default=None, type="int",
                       help="Set absolute niceness for process")
 
-    # upload baseline group
-    upload_baseline_group = optparse.OptionGroup(
-        parser, 'Upload Baseline Options',
-        ('Options for uploading the result of the "baseline" python to '
-         'codespeed. The hostname of the --force-host option will be used '
-         'in the baseline upload too.'))
-    upload_baseline_group.add_option(
-        "--upload-baseline", default=None, action="store_true",
-        help=("Also upload results or the baseline benchmark "
-              "to speed.pypy.org (unless "
-              "--upload-baseline-url is given)."))
-    upload_baseline_group.add_option(
-        "--upload-baseline-urls",
-        default="http://speed.pypy.org/";,
-        help=("Comma seperated urls of the codespeed instances "
-              "to upload to. (default: http://speed.pypy.org/)"))
-    upload_baseline_group.add_option(
-        "--upload-baseline-project", default="PyPy",
-        help="The project name in codespeed (default: PyPy).")
-    upload_baseline_group.add_option(
-        "--upload-baseline-executable", default=None,
-        help=("The executable name in codespeed. (required if "
-              "--upload-baseline is given)"))
-    upload_baseline_group.add_option(
-        '--upload-baseline-branch', default='default',
-        action='store',
-        help=("The name of the branch used for the baseline "
-              "run. (default: 'default'"))
-    upload_baseline_group.add_option(
-        '--upload-baseline-revision', action='store',
-        default=None,
-        help=("The revision of the baseline. (required if --upload-baseline "
-              "is given)"))
-    parser.add_option_group(upload_baseline_group)
-
-    # Backward compoatibility options
-    deprecated_group = optparse.OptionGroup(
-        parser, 'Deprecated Options',
-        'Still here for backward compatibility.')
-    deprecated_group.add_option(
-        '-p', '--pypy-c', default=sys.executable,
-        dest='changed', help='Deprecated alias for -c/--changed')
-    parser.add_option_group(deprecated_group)
-
     options, args = parser.parse_args(argv)
 
-    upload_options = get_upload_options(options)
     benchmarks = options.benchmarks.split(',')
     for benchmark in benchmarks:
         if benchmark not in BENCHMARK_SET:
             raise WrongBenchmark(benchmark)
 
-    changed_path = options.changed
-    baseline_path = options.baseline
+    path = options.python
     fast = options.fast
     args = options.args
     full_store = options.full_store
@@ -277,25 +141,20 @@
     if options.niceness is not None:
         os.nice(options.niceness - os.nice(0))
 
-    results = run_and_store(benchmarks, output_filename, changed_path,
+    results = run_and_store(benchmarks, output_filename, path,
                             revision, args=args, fast=fast,
-                            baseline_path=baseline_path,
                             full_store=full_store, branch=branch)
 
-    for run in [CHANGED, BASELINE]:
-        upload = upload_options[run]['upload']
-        urls = upload_options[run]['urls']
-        project = upload_options[run]['project']
-        executable = upload_options[run]['executable']
-        branch = upload_options[run]['branch'] or 'default'
-        revision = upload_options[run]['revision']
+    if options.upload_url:
+        branch = options.upload_branch or 'default'
+        revision = options.upload_revision
 
-        if upload:
-            # prevent to upload results from the nullpython dummy
-            host = force_host if force_host else socket.gethostname()
-            for url in urls:
-                print save(project, revision, results, executable, host, url,
-                           changed=(run == CHANGED), branch=branch)
+        # prevent to upload results from the nullpython dummy
+        host = force_host if force_host else socket.gethostname()
+        print save(options.upload_project,
+                   revision, results, options.upload_executable, host,
+                   options.upload_url,
+                   branch=branch)
 
 
 if __name__ == '__main__':
diff --git a/saveresults.py b/saveresults.py
--- a/saveresults.py
+++ b/saveresults.py
@@ -30,7 +30,7 @@
 
 
 def save(project, revision, results, executeable, host, url, testing=False,
-         changed=True, branch='default'):
+         branch='default'):
     testparams = []
     #Parse data
     data = {}
@@ -41,21 +41,12 @@
         res_type = b[1]
         results = b[2]
         value = 0
-        if res_type == "SimpleComparisonResult":
-            if changed:
-                value = results['changed_time']
-            else:
-                value = results['base_time']
-        elif res_type == "ComparisonResult":
-            if changed:
-                value = results['avg_changed']
-            else:
-                value = results['avg_base']
+        if res_type == "SimpleResult":
+            value = results['time']
+        elif res_type == "Result":
+            value = results['avg_time']
         elif res_type == "RawResult":
-            if changed:
-                value = results["changed_times"]
-            else:
-                value = results["base_times"]
+            value = results["times"]
             if value:
                 assert len(value) == 1
                 value = value[0]
@@ -74,11 +65,6 @@
         if value is None:
             print "Ignoring skipped result", data
             continue
-        if res_type == "ComparisonResult":
-            if changed:
-                data['std_dev'] = results['std_changed']
-            else:
-                data['std_dev'] = results['std_base']
         if testing:
             testparams.append(data)
         else:
diff --git a/unladen_swallow/perf.py b/unladen_swallow/perf.py
--- a/unladen_swallow/perf.py
+++ b/unladen_swallow/perf.py
@@ -340,38 +340,18 @@
         self._done.wait()
         return self._usage
 
-class ComparisonResult(object):
+class Result(object):
     """ An object representing a result of run. Can be converted to
     a string by calling string_representation
     """
-    def __init__(self, min_base, min_changed, delta_min, avg_base,
-                 avg_changed, delta_avg, t_msg, std_base, std_changed,
-                 delta_std, timeline_link):
-        self.min_base      = min_base
-        self.min_changed   = min_changed
-        self.delta_min     = delta_min
-        self.avg_base      = avg_base
-        self.avg_changed   = avg_changed
-        self.delta_avg     = delta_avg
-        self.t_msg         = t_msg
-        self.std_base      = std_base
-        self.std_changed   = std_changed
-        self.delta_std     = delta_std
-        self.timeline_link = timeline_link
-
-    def get_timeline(self):
-        if self.timeline_link is None:
-            return ""
-        return "Timeline: %(timeline_link)s"
+    def __init__(self, times, min_time, avg_time, std_time):
+        self.times = times
+        self.min_time = min_time
+        self.avg_time = avg_time
+        self.std_time = std_time
 
     def string_representation(self):
-        return (("Min: %(min_base)f -> %(min_changed)f:" +
-                 " %(delta_min)s\n" +
-                 "Avg: %(avg_base)f -> %(avg_changed)f:" +
-                 " %(delta_avg)s\n" + self.t_msg +
-                 "Stddev: %(std_base).5f -> %(std_changed).5f:" +
-                 " %(delta_std)s\n" + self.get_timeline())
-                 % self.__dict__)
+        return "Time: %(min_time)f +- %(std_time)f" % self.__dict__
 
 class ResultError(object):
     def __init__(self, e):
@@ -397,14 +377,12 @@
                  " %(delta_max)s\n" + self.get_usage_over_time())
                  % self.__dict__)
 
-class SimpleComparisonResult(object):
-    def __init__(self, base_time, changed_time, time_delta):
-        self.base_time    = base_time
-        self.changed_time = changed_time
-        self.time_delta   = time_delta
+class SimpleResult(object):
+    def __init__(self, time):
+        self.time = time
 
     def string_representation(self):
-        return ("%(base_time)f -> %(changed_time)f: %(time_delta)s"
+        return ("%(time)f"
                 % self.__dict__)
 
 class RawResult(object):
@@ -420,15 +398,11 @@
     max_base, max_changed = max(base_usage), max(changed_usage)
     delta_max = QuantityDelta(max_base, max_changed)
 
-    chart_link = GetChart(SummarizeData(base_usage),
-                          SummarizeData(changed_usage),
-                          options)
-
-    return MemoryUsageResult(max_base, max_changed, delta_max, chart_link)
+    return MemoryUsageResult(max_base, max_changed, delta_max, "")
 
 ### Utility functions
 
-def SimpleBenchmark(benchmark_function, base_python, changed_python, options,
+def SimpleBenchmark(benchmark_function, python, options,
                     *args, **kwargs):
     """Abstract out the body for most simple benchmarks.
 
@@ -442,8 +416,6 @@
     Args:
         benchmark_function: callback that takes (python_path, options) and
             returns a (times, memory_usage) 2-tuple.
-        base_python: path to the reference Python binary.
-        changed_python: path to the experimental Python binary.
         options: optparse.Values instance.
         *args, **kwargs: will be passed through to benchmark_function.
 
@@ -452,65 +424,12 @@
         Comes with string_representation method.
     """
     try:
-        changed_data = benchmark_function(changed_python, options,
-                                          *args, **kwargs)
-        base_data = benchmark_function(base_python, options,
-                                       *args, **kwargs)
+        data = benchmark_function(python, options,
+                                  *args, **kwargs)
     except subprocess.CalledProcessError, e:
         return ResultError(e)
 
-    return CompareBenchmarkData(base_data, changed_data, options)
-
-
-def GetChart(base_data, changed_data, options, chart_margin=100):
-    """Build a Google Chart API URL for the given data.
-
-    Args:
-        base_data: data points for the base binary.
-        changed_data: data points for the changed binary.
-        options: optparse.Values instance.
-        chart_margin: optional integer margin to add/sub from the max/min.
-
-    Returns:
-        Google Chart API URL as a string.
-    """
-    if options.no_charts:
-        return None
-    # We use these to scale the graph.
-    min_data = min(min(base_data), min(changed_data)) - chart_margin
-    max_data = max(max(base_data), max(changed_data)) + chart_margin
-    # Google-bound data, formatted as desired by the Chart API.
-    data_for_google = (",".join(map(str, base_data)) + "|" +
-                       ",".join(map(str, changed_data)))
-
-    # Come up with labels for the X axis; not too many, though, or they'll be
-    # unreadable.
-    max_len = max(len(base_data), len(changed_data))
-    points = SummarizeData(range(1, max_len + 1), points=5)
-    if points[0] != 1:
-        points.insert(0, 1)
-    x_axis_labels = "".join("|%d" % i for i in points)
-
-    # Parameters for the Google Chart API. See
-    # http://code.google.com/apis/chart/ for more details.
-    # cht=lc: line graph with visible axes.
-    # chs: dimensions of the graph, in pixels.
-    # chdl: labels for the graph lines.
-    # chco: colors for the graph lines.
-    # chds: minimum and maximum values for the vertical axis.
-    # chxr: minimum and maximum values for the vertical axis labels.
-    # chd=t: the data sets, |-separated.
-    # chxt: which axes to draw.
-    # chxl: labels for the axes.
-    base_binary = options.base_binary
-    changed_binary = options.changed_binary
-    raw_url = 
("http://chart.apis.google.com/chart?cht=lc&chs=700x400&chxt=x,y&";
-               "chxr=1,%(min_data)s,%(max_data)s&chco=FF0000,0000FF&"
-               "chdl=%(base_binary)s|%(changed_binary)s&"
-               "chds=%(min_data)s,%(max_data)s&chd=t:%(data_for_google)s&"
-               "chxl=0:%(x_axis_labels)s"
-               % locals())
-    return ShortenUrl(raw_url)
+    return CompareBenchmarkData(data, options)
 
 
 def ShortenUrl(url):
@@ -656,7 +575,7 @@
     return fixed_env
 
 
-def CompareMultipleRuns(base_times, changed_times, options):
+def CompareMultipleRuns(times, options):
     """Compare multiple control vs experiment runs of the same benchmark.
 
     Args:
@@ -668,54 +587,26 @@
         A string summarizing the difference between the runs, suitable for
         human consumption.
     """
-    if len(base_times) != len(changed_times):
-        print "Base:"
-        print base_times
-        print "Changed:"
-        print changed_times
-        raise Exception("length did not match")
     if options.no_statistics:
-        return RawResult(base_times, changed_times)
-    if len(base_times) == 1:
+        return RawResult(times)
+    if len(times) == 1:
         # With only one data point, we can't do any of the interesting stats
         # below.
-        base_time, changed_time = base_times[0], changed_times[0]
-        time_delta = TimeDelta(base_time, changed_time)
-        return SimpleComparisonResult(base_time, changed_time, time_delta)
+        return SimpleResult(times[0])
 
-    # Create a chart showing iteration times over time. We round the times so
-    # as not to exceed the GET limit for Google's chart server.
-    timeline_link = GetChart([round(t, 2) for t in base_times],
-                             [round(t, 2) for t in changed_times],
-                             options, chart_margin=1)
+    times = sorted(times)
 
-    base_times = sorted(base_times)
-    changed_times = sorted(changed_times)
+    min_time = times[0]
+    avg_time = avg(times)
+    std_time = SampleStdDev(times)
 
-    min_base, min_changed = base_times[0], changed_times[0]
-    avg_base, avg_changed = avg(base_times), avg(changed_times)
-    std_base = SampleStdDev(base_times)
-    std_changed = SampleStdDev(changed_times)
-    delta_min = TimeDelta(min_base, min_changed)
-    delta_avg = TimeDelta(avg_base, avg_changed)
-    delta_std = QuantityDelta(std_base, std_changed)
+    return Result(times, min_time, avg_time, std_time)
 
-    t_msg = "Not significant\n"
-    significant, t_score = IsSignificant(base_times, changed_times)
-    if significant:
-        t_msg = "Significant (t=%f, a=0.95)\n" % t_score
-
-    return ComparisonResult(min_base, min_changed, delta_min, avg_base,
-                            avg_changed, delta_avg, t_msg, std_base,
-                            std_changed, delta_std, timeline_link)
-
-def CompareBenchmarkData(base_data, changed_data, options):
+def CompareBenchmarkData(data, options):
     """Compare performance and memory usage.
 
     Args:
-        base_data: 2-tuple of (times, mem_usage) where times is an iterable
-            of floats; mem_usage is a list of memory usage samples.
-        changed_data: 2-tuple of (times, mem_usage) where times is an iterable
+        data: 2-tuple of (times, mem_usage) where times is an iterable
             of floats; mem_usage is a list of memory usage samples.
         options: optparse.Values instance.
 
@@ -723,17 +614,16 @@
         Human-readable summary of the difference between the base and changed
         binaries.
     """
-    base_times, base_mem = base_data
-    changed_times, changed_mem = changed_data
+    times, mem = data
 
     # We suppress performance data when running with --track_memory.
     if options.track_memory:
-        if base_mem is not None:
-            assert changed_mem is not None
+        if mem is not None:
+            XXX # we don't track memory
             return CompareMemoryUsage(base_mem, changed_mem, options)
         return "Benchmark does not report memory usage yet"
 
-    return CompareMultipleRuns(base_times, changed_times, options)
+    return CompareMultipleRuns(times, options)
 
 
 def CallAndCaptureOutput(command, env=None, track_memory=False, 
inherit_env=[]):
@@ -1516,25 +1406,6 @@
             should_run.remove(bm)
     return should_run
 
-def ParsePythonArgsOption(python_args_opt):
-    """Parses the --args option.
-
-    Args:
-        python_args_opt: the string passed to the -a option on the command 
line.
-
-    Returns:
-        A pair of lists: (base_python_args, changed_python_args).
-    """
-    args_pair = python_args_opt.split(",")
-    base_args = args_pair[0].split()  # On whitespace.
-    changed_args = base_args
-    if len(args_pair) == 2:
-        changed_args = args_pair[1].split()
-    elif len(args_pair) > 2:
-        logging.warning("Didn't expect two or more commas in --args flag: %s",
-                        python_args_opt)
-    return base_args, changed_args
-
 def ParseEnvVars(option, opt_str, value, parser):
     """Parser callback to --inherit_env var names"""
     parser.values.inherit_env = [v for v in value.split(",") if v]
@@ -1586,15 +1457,16 @@
                       help=("Don't perform statistics - return raw data"))
 
     options, args = parser.parse_args(argv)
-    if len(args) != 2:
+    if len(args) != 1:
         parser.error("incorrect number of arguments")
-    base, changed = args
+    base, = args
     options.base_binary = base
-    options.changed_binary = changed
 
-    base_args, changed_args = ParsePythonArgsOption(options.args)
-    base_cmd_prefix = [base] + base_args
-    changed_cmd_prefix = [changed] + changed_args
+    base_args = options.args
+    if base_args:
+        base_cmd_prefix = [base] + base_args.split(" ")
+    else:
+        base_cmd_prefix = [base]
 
     logging.basicConfig(level=logging.INFO)
 
@@ -1614,7 +1486,7 @@
         print "Running %s..." % name
         # PyPy specific modification: let the func to return a list of results
         # for sub-benchmarks
-        bench_result = func(base_cmd_prefix, changed_cmd_prefix, options)
+        bench_result = func(base_cmd_prefix, options)
         name = getattr(func, 'benchmark_name', name)
         if isinstance(bench_result, list):
             for subname, subresult in bench_result:
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] benchmarks single-run: Kill a whole lot of stupid mess - now we can have one interpreter benchmarked

Reply via email to