Hi mclow.lists, chandlerc, danalbert,
Hi All,
This is the initial commit for the benchmark testing framework I plan to use in
libc++. It functions similarly to the existing LIT setup.
The benchmarks use the Google benchmark library found here:
http://github.com/google/benchmark.
To enable building the benchmark library use the cmake option
`-DLIBCXX_ENABLE_BENCHMARK=ON`. This option will checkout the library from
github and build it in the build/external directory.
Once the library is built the benchmarks can be run in one of two ways.
1. Standalone (without baseline comparison): This simply runs the benchmarks
and generates the output. Use `-o /path/to/OUTPUT` to save the results of the
benchmarks.
Example usage:
```
lit -v -o /path/to/baseline.txt /path/to/benchmarks
```
2. Comparison against baseline: This runs the benchmarks and compares the
results to a baseline file specified. If the current results are slower by more
than the "allowed difference" the test fails and the results are reported.
Example usage:
```
lit -sv -o /path/to/current_results.txt --param=baseline=/path/to/baseline.txt
--param=allowed_difference=2.5 /path/to/benchmarks
```
The `allowed_difference` parameter takes the percentage which the results are
allowed to differ from the baseline. The default is 5%.
The benchmark tests are not run as part of the regular test suite. They are too
time consuming and do not provide much value unless they are compared to a
baseline. They are instead an entirely separate test suite.
http://reviews.llvm.org/D8107
Files:
CMakeLists.txt
external/CMakeLists.txt
external/Toolchain.cmake.in
test/benchmark/lit.cfg
test/benchmark/test.bench.cpp
test/libcxx/test/benchmark.py
test/libcxx/test/config.py
test/libcxx/test/format.py
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
Index: CMakeLists.txt
===================================================================
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -68,6 +68,7 @@
set(LLVM_USE_SANITIZER "" CACHE STRING
"Define the sanitizer used to build the library and tests")
endif()
+option(LIBCXX_ENABLE_BENCHMARKS "Enable the benchmark tests." ON)
if (LIBCXX_ENABLE_STATIC_ABI_LIBRARY)
if (APPLE)
@@ -296,6 +297,7 @@
# Add source code. This also contains all of the logic for deciding linker flags
# soname, etc...
add_subdirectory(lib)
+add_subdirectory(external)
#===============================================================================
# Setup Tests
Index: external/CMakeLists.txt
===================================================================
--- /dev/null
+++ external/CMakeLists.txt
@@ -0,0 +1,19 @@
+
+if (LIBCXX_ENABLE_BENCHMARKS)
+ configure_file(
+ ${CMAKE_CURRENT_SOURCE_DIR}/Toolchain.cmake.in
+ ${CMAKE_CURRENT_BINARY_DIR}/Toolchain.cmake
+ @ONLY)
+
+ include(ExternalProject)
+
+ ExternalProject_Add(
+ Benchmark
+ SVN_REPOSITORY https://github.com/google/benchmark/branches/api-merge
+ CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_CURRENT_BINARY_DIR}
+ -DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_CURRENT_BINARY_DIR}/Toolchain.cmake
+ -DCMAKE_BUILD_TYPE=RELEASE
+ -DBENCHMARK_ENABLE_TESTING:BOOL=OFF
+ -DBENCHMARK_ENABLE_SHARED:BOOL=ON
+ )
+endif()
Index: external/Toolchain.cmake.in
===================================================================
--- /dev/null
+++ external/Toolchain.cmake.in
@@ -0,0 +1,10 @@
+
+set(CMAKE_CXX_COMPILER @CMAKE_CXX_COMPILER@)
+set(CMAKE_C_COMPILER @CMAKE_C_COMPILER@)
+
+# Try to staticly link the C++ standard library so that we don't have libstdc++
+# and libc++ dynamically linked into our tests.
+if (NOT APPLE AND NOT @CMAKE_SYSTEM_NAME@ STREQUAL "FreeBSD")
+ set(CMAKE_SHARED_LINKER_FLAGS "-static-libgcc -static-libstdc++" CACHE STRING "")
+ set(CMAKE_MODULE_LINKER_FLAGS "-static-libgcc -static-libstdc++" CACHE STRING "")
+endif()
Index: test/benchmark/lit.cfg
===================================================================
--- /dev/null
+++ test/benchmark/lit.cfg
@@ -0,0 +1,42 @@
+# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79:
+# Configuration file for the 'lit' test runner.
+import os
+import site
+import sys
+
+site.addsitedir(os.path.join(os.path.dirname(__file__), '..'))
+import libcxx.test.config
+
+# Tell pylint that we know config and lit_config exist somewhere.
+if 'PYLINT_IMPORT' in os.environ:
+ config = object()
+ lit_config = object()
+
+# name: The name of this test suite.
+config.name = 'libc++-benchmark'
+
+config.suffixes = ['.bench.cpp']
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# Infer the test_exec_root from the libcxx_object root.
+obj_root = getattr(config, 'libcxx_obj_root', None)
+
+# Check that the test exec root is known.
+if obj_root is None:
+ import libcxx.test.config
+ libcxx.test.config.loadSiteConfig(
+ lit_config, config, 'libcxx_site_config', 'LIBCXX_SITE_CONFIG')
+ obj_root = getattr(config, 'libcxx_obj_root', None)
+ if obj_root is None:
+ import tempfile
+ obj_root = tempfile.mkdtemp(prefix='libcxx-benchmark-')
+ lit_config.warning('Creating temporary directory for object root: %s' %
+ obj_root)
+
+config.test_exec_root = os.path.join(obj_root, 'test')
+
+configuration = libcxx.test.config.BenchmarkConfiguration(lit_config, config)
+configuration.configure()
+config.test_format = configuration.get_test_format()
Index: test/benchmark/test.bench.cpp
===================================================================
--- /dev/null
+++ test/benchmark/test.bench.cpp
@@ -0,0 +1,8 @@
+#include "benchmark/minimal_benchmark.h"
+
+static void BM_test_empty(benchmark::State& state) {
+ while (state.KeepRunning()) {}
+}
+BENCHMARK(BM_test_empty);
+
+BENCHMARK_MAIN()
Index: test/libcxx/test/benchmark.py
===================================================================
--- /dev/null
+++ test/libcxx/test/benchmark.py
@@ -0,0 +1,205 @@
+import json
+import re
+
+import lit
+import lit.Test
+
+
+def stringToCode(str_code):
+ if str_code == 'PASS':
+ return lit.Test.PASS
+ elif str_code == 'XFAIL':
+ return lit.Test.XFAIL
+ elif str_code == 'FAIL':
+ return lit.Test.FAIL
+ elif str_code == 'XPASS':
+ return lit.Test.XPASS
+ elif str_code == 'UNRESOLVED':
+ return lit.Test.UNRESOLVED
+ elif str_code == 'UNSUPPORTED':
+ return lit.Test.UNSUPPORTED
+ else:
+ assert False
+
+
+def loadTestResults(from_file):
+ """
+ Read in the output of a benchmark test run.
+ """
+ with open(from_file, 'r') as output_file:
+ output = json.load(output_file)
+ raw_tests = output['tests']
+ tests = {}
+ for rt in raw_tests:
+ test = {
+ 'name': rt['name'],
+ 'code': stringToCode(rt['code']),
+ 'output': rt['output'],
+ 'benchmarks': rt['metrics']['benchmarks']
+ }
+ tests[rt['name']] = test
+ return tests
+
+
+# Regex to parse a single line of a benchmarks output. The basic format is as
+# follows: <name> <time> <cpu_time> <iterations> (<extra fields>...)\n
+kbench_line_re = re.compile(
+ '^\s*([^\s]+)\s+([-0-9]+)\s+([-0-9]+)\s+([0-9]+)([^\n]*)')
+
+
+def parseBenchmarkLine(line):
+ """
+ Parse the output of a single benchmark
+ """
+ assert line # Assert non-empty and non-null
+ if line.startswith('DEBUG: '):
+ line = line[len('DEBUG: '):]
+ # TODO(ericwf): This is a hack because the benchmark name can contain
+ # spaces if it names a template: ex BM_Foo<int, long>. Remove this.
+ new_line = line.replace(', ', ',$')
+ match = kbench_line_re.match(new_line)
+ assert match is not None
+ parsed_bench = {
+ 'name': match.group(1).replace(',$', ', '),
+ 'time': max(int(match.group(2)), 1), # Ensure non-zero
+ 'cpu_time': max(int(match.group(3)), 1), # Ensure non-zero
+ 'iterations': int(match.group(4)),
+ }
+ parsed_bench['total_cpu_time'] = (parsed_bench['cpu_time'] *
+ parsed_bench['iterations'])
+ parsed_bench['total_time'] = (parsed_bench['time'] *
+ parsed_bench['iterations'])
+ return parsed_bench
+
+
+def removeRepeatedBenchmarks(benchmark_list):
+ """
+ Some benchmarks are run multiple times and report
+ a mean and stddev at the end. This function removes all of repeated runs
+ and combines the mean and stddev into one benchmark result.
+ Example Output:
+ Name Time(ns) Iterations
+ BM_my_test 11 95
+ BM_my_test 10 100
+ BM_my_test 9 105
+ BM_my_test_mean 10 100
+ BM_my_test_stddev 1 5
+ BM_different_test (...)
+ """
+ has_repeats = (len(benchmark_list) >= 4 and
+ benchmark_list[0]['name'] == benchmark_list[1]['name'])
+ if not has_repeats:
+ return benchmark_list
+ new_benchmark_list = []
+ for i in range(len(benchmark_list)):
+ possible_mean = benchmark_list[i]
+ name = possible_mean['name']
+ is_mean = name.endswith('_mean')
+ if not is_mean:
+ continue
+ real_name = name[:-len('_mean')]
+ new_bench = dict(possible_mean)
+ new_bench['name'] = real_name
+ assert len(benchmark_list) > i+1
+ stddev_bench = benchmark_list[i+1]
+ new_bench['time_stddev'] = stddev_bench['time']
+ new_bench['cpu_time_stddev'] = stddev_bench['cpu_time']
+ new_bench['iterations_stddev'] = stddev_bench['iterations']
+ new_benchmark_list += [new_bench]
+ return new_benchmark_list
+
+
+# Regex to split benchmark output header and results.
+# The header and results are split by a line containing only "-" characters.
+ksplit_line_re = re.compile('\n[-]+\n')
+
+
+def parseBenchmarkOutput(output):
+ """
+ Parse the output of the entire benchmark
+ """
+ # Split the benchmark output header and results based on a line containing
+ # only '-' characters.
+ parts = ksplit_line_re.split(output, maxsplit=1)
+ assert len(parts) == 2
+ benchmark_list = [parseBenchmarkLine(l.strip())
+ for l in parts[1].split('\n') if l.strip()]
+ benchmark_list = removeRepeatedBenchmarks(benchmark_list)
+ benchmark_dict = {}
+ benchmark_index = 0
+ for b in benchmark_list:
+ benchmark_index += 1
+ b['index'] = benchmark_index
+ benchmark_dict[b['name']] = b
+ return benchmark_dict
+
+
+def createBenchmarkDiff(first, second):
+ """
+ diff two benchmarks and return the difference.
+ """
+ def diff_fn(first, second):
+ return second / float(first)
+ return {
+ 'name': first['name'],
+ 'iterations': diff_fn(
+ first['iterations'], second['iterations']),
+ 'cpu_time': diff_fn(
+ second['cpu_time'], first['cpu_time']),
+ 'time': diff_fn(
+ second['time'], first['time'])
+ }
+
+
+def DiffBenchmarkResults(baseline, current):
+ """
+ Diff every benchmark in current against baseline and return
+ the results. If there is no matching benchmark in baseline that benchmark
+ is skipped.
+ """
+ diff_map = {}
+ for curr_k, curr_v in current.iteritems():
+ matching_baseline = baseline.get(curr_k)
+ if not matching_baseline:
+ continue
+ diff = createBenchmarkDiff(curr_v, matching_baseline)
+ diff_map[curr_k] = diff
+ return diff_map
+
+
+def formatDiffString(key, baseline, curr, diff):
+ """
+ Format a user readable string that reports the difference between one
+ value of a benchmarks output.
+ """
+ cmp_str = 'FASTER' if diff[key] < 1.0 else 'SLOWER'
+ fmt_str = '{0:11} {1:8} {2} (current={3}, baseline={4}, diff={5})'
+ label = '%s:' % key
+ diff_v = abs(diff[key])
+ # Print the change as a multiplier if it is >= 2. Otherwise print it as
+ # a percentage.
+ if diff_v >= 2:
+ change = '%.3fx' % diff_v
+ else:
+ change = '%.3f%%' % abs((diff_v * 100) - 100)
+ return fmt_str.format(label, change, cmp_str, curr[key], baseline[key],
+ abs(curr[key]-baseline[key]))
+
+
+def formatFailDiff(baseline, curr, diff):
+ """
+ Format a user readable string that reports the difference between all
+ values of a benchmark output.
+ """
+ return ('%s failed:\n %s\n %s\n %s\n' %
+ (curr['name'],
+ formatDiffString('cpu_time', baseline, curr, diff),
+ formatDiffString('iterations', baseline, curr, diff),
+ formatDiffString('time', baseline, curr, diff)))
+
+def formatPassDiff(baseline, curr, diff):
+ return ('%s passed:\n %s\n %s\n %s\n' %
+ (curr['name'],
+ formatDiffString('cpu_time', baseline, curr, diff),
+ formatDiffString('iterations', baseline, curr, diff),
+ formatDiffString('time', baseline, curr, diff)))
Index: test/libcxx/test/config.py
===================================================================
--- test/libcxx/test/config.py
+++ test/libcxx/test/config.py
@@ -10,11 +10,12 @@
import lit.Test # pylint: disable=import-error,no-name-in-module
import lit.util # pylint: disable=import-error,no-name-in-module
-from libcxx.test.format import LibcxxTestFormat
+from libcxx.test.format import LibcxxTestFormat, LibcxxBenchmarkFormat
from libcxx.compiler import CXXCompiler
from libcxx.test.executor import *
from libcxx.test.tracing import *
+
def loadSiteConfig(lit_config, config, param_name, env_name):
# We haven't loaded the site specific configuration (the user is
# probably trying to run on a test file directly, and either the site
@@ -639,3 +640,59 @@
cxx_library_root = self.cxx_library_root
if cxx_library_root:
self.env['DYLD_LIBRARY_PATH'] = cxx_library_root
+
+
+class BenchmarkConfiguration(Configuration):
+ def __init__(self, lit_config, config):
+ super(BenchmarkConfiguration, self).__init__(lit_config, config)
+ self.baseline = None
+ self.allowed_difference = None
+
+ def get_test_format(self):
+ return LibcxxBenchmarkFormat(
+ self.baseline,
+ self.allowed_difference,
+ self.cxx,
+ self.use_clang_verify,
+ self.execute_external,
+ self.executor,
+ exec_env=self.env)
+
+ def configure(self):
+ super(BenchmarkConfiguration, self).configure()
+ self.configure_benchmark_flags()
+ self.configure_baseline()
+ self.configure_allowed_difference()
+ self.print_config_info()
+
+ def configure_baseline(self):
+ res = self.get_lit_conf('baseline')
+ if not res:
+ return
+ if not os.path.isfile(res):
+ self.lit_config.fatal('Invalid output file: %s' % res)
+ self.lit_config.note('Comparing to results file: %s' % res)
+ import libcxx.test.benchmark as benchcxx
+ self.baseline = benchcxx.loadTestResults(res)
+
+ def configure_allowed_difference(self):
+ allowed_diff = self.get_lit_conf('allowed_difference', '5.0')
+ self.allowed_difference = float(allowed_diff)
+
+ def configure_benchmark_flags(self):
+ external_dir = os.path.join(self.libcxx_obj_root, 'external')
+ self.cxx.compile_flags += [
+ '-I' + external_dir + '/include',
+ '-I' + self.libcxx_src_root + '/test/benchmark/support'
+ ]
+ lib_path = external_dir + '/lib'
+ self.cxx.link_flags = ['-L' + lib_path,
+ '-Wl,-rpath,' + lib_path] + self.cxx.link_flags
+ self.cxx.link_flags += ['-lbenchmark']
+ if sys.platform == 'darwin':
+ dyn_path = self.env.get('DYLD_LIBRARY_PATH')
+ if dyn_path is None:
+ dyn_path = lib_path
+ else:
+ dyn_path = dyn_path + ':' + lib_path
+ self.env['DYLD_LIBRARY_PATH'] = dyn_path
Index: test/libcxx/test/format.py
===================================================================
--- test/libcxx/test/format.py
+++ test/libcxx/test/format.py
@@ -1,13 +1,15 @@
import errno
import os
+import re
+import tempfile
import time
import lit.Test # pylint: disable=import-error
import lit.TestRunner # pylint: disable=import-error
import lit.util # pylint: disable=import-error
+import libcxx.test.benchmark as benchcxx
from libcxx.test.executor import LocalExecutor as LocalExecutor
-import libcxx.test.executor
import libcxx.util
@@ -41,8 +43,7 @@
filepath = os.path.join(source_path, filename)
if not os.path.isdir(filepath):
- if any([filename.endswith(ext)
- for ext in localConfig.suffixes]):
+ if any([filepath.endswith(s) for s in localConfig.suffixes]):
yield lit.Test.Test(testSuite, path_in_suite + (filename,),
localConfig)
@@ -148,3 +149,124 @@
report = libcxx.util.makeReport(cmd, out, err, rc)
return (lit.Test.FAIL,
report + 'Expected compilation to fail!\n')
+
+
+class LibcxxBenchmarkFormat(LibcxxTestFormat):
+ def __init__(self, baseline, allowed_difference, *args, **kwargs):
+ super(LibcxxBenchmarkFormat, self).__init__(*args, **kwargs)
+ self.baseline = baseline
+ self.allowed_difference = allowed_difference
+
+ def _execute(self, test, lit_config):
+ res = lit.TestRunner.parseIntegratedTestScript(
+ test, require_script=False)
+ # Check if a result for the test was returned. If so return that
+ # result.
+ if isinstance(res, lit.Test.Result):
+ return res
+ if lit_config.noExecute:
+ return lit.Test.Result(lit.Test.PASS)
+ # res is not an instance of lit.test.Result. Expand res into its parts.
+ script, tmpBase, execDir = res
+ # Check that we don't have run lines on tests that don't support them.
+ if len(script) != 0:
+ lit_config.fatal('Unsupported RUN line found in test %s' % name)
+ res = self._benchmark_test(test, tmpBase, execDir, lit_config)
+ if not isinstance(res, lit.Test.Result):
+ code, output = res
+ res = lit.Test.Result(code, output)
+ if not res.code == lit.Test.PASS:
+ return res
+ return self._benchmark_test(test, tmpBase, execDir, lit_config)
+
+ def _benchmark_test(self, test, tmpBase, execDir, lit_config):
+ source_path = test.getSourcePath()
+ exec_path = tmpBase + '.exe'
+ object_path = tmpBase + '.o'
+ # Create the output directory if it does not already exist.
+ lit.util.mkdir_p(os.path.dirname(tmpBase))
+ try:
+ # Compile the test
+ cmd, out, err, rc = self.cxx.compileLinkTwoSteps(
+ source_path, out=exec_path, object_file=object_path,
+ cwd=execDir)
+ compile_cmd = cmd
+ if rc != 0:
+ report = libcxx.util.makeReport(cmd, out, err, rc)
+ report += "Compilation failed unexpectedly!"
+ return lit.Test.FAIL, report
+ # Run the test
+ cmd = [exec_path, '--benchmark_repetitions=3']
+ out, err, rc = self.executor.run(
+ None, cmd=cmd, work_dir=os.path.dirname(source_path),
+ env=self.exec_env)
+ if rc != 0:
+ report = libcxx.util.makeReport(cmd, out, err, rc)
+ report = "Compiled With: %s\n%s" % (compile_cmd, report)
+ report += "Compiled test failed unexpectedly!"
+ return lit.Test.FAIL, report
+ scale_warning = ('CPU scaling is enabled: ' +
+ 'Benchmark timings may be noisy.')
+ if scale_warning in out:
+ lit_config.warning(scale_warning)
+ result = lit.Test.Result(lit.Test.PASS, '')
+ benchmark_data = benchcxx.parseBenchmarkOutput(out)
+ result.addMetric('benchmarks',
+ lit.Test.toMetricValue(benchmark_data))
+ # Check for a benchmark that looks like it does nothing.
+ # This is likely a problem.
+ bad_results_str = self._detect_bad_results(benchmark_data)
+ if bad_results_str:
+ result.code = lit.Test.FAIL
+ result.output = bad_results_str
+ return result
+ # Compare the results to the baseline if the baseline is present.
+ if self.baseline:
+ failing_bench_str = self._compare_results(
+ test.getFullName(), result)
+ if failing_bench_str:
+ result.code = lit.Test.FAIL
+ result.output = failing_bench_str
+ result.metrics = {}
+ return result
+ finally:
+ # Note that cleanup of exec_file happens in `_clean()`. If you
+ # override this, cleanup is your reponsibility.
+ self._clean(exec_path)
+
+ def _detect_bad_results(self, benches):
+ bad_results_str = ''
+ for k, v in benches.iteritems():
+ if v['cpu_time'] < 10 and k != 'BM_test_empty':
+ bad_results_str += ('Test %s runs too quickly! cpu_time=%s\n'
+ % (k, v['cpu_time']))
+ return bad_results_str
+
+ def _compare_results(self, test_name, result):
+ baseline_results = self.baseline.get(test_name)
+ if baseline_results is None:
+ return None
+ this_bench = result.metrics['benchmarks'].value
+ baseline_bench = baseline_results['benchmarks']
+ # Calculate the timing and iteration differences.
+ diff_metrics = benchcxx.DiffBenchmarkResults(
+ baseline_bench, this_bench)
+ result.addMetric(
+ 'benchmark_diff', lit.Test.toMetricValue(diff_metrics))
+ # Collect all of the failing test result strings. Map by index
+ # so that they are printed in the order thay were run.
+ failing_bench_map = {}
+ passing_bench_map = {}
+ for diff_name, diff in diff_metrics.items():
+ curr_b = this_bench[diff_name]
+ baseline_b = baseline_bench[diff_name]
+ if diff['cpu_time'] * 100 - 100 <= self.allowed_difference:
+ passing_bench_map[curr_b['index']] = benchcxx.formatPassDiff(
+ baseline_b, curr_b, diff)
+ else:
+ failing_bench_map[curr_b['index']] = benchcxx.formatFailDiff(
+ baseline_b, curr_b, diff)
+ if failing_bench_map:
+ for k, v in passing_bench_map.iteritems():
+ failing_bench_map[k] = v
+ return '\n'.join([v for v in failing_bench_map.values()])
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits