Modified: trunk/Tools/Scripts/webkitpy/performance_tests/perftest.py (138954 => 138955)
--- trunk/Tools/Scripts/webkitpy/performance_tests/perftest.py 2013-01-07 18:27:05 UTC (rev 138954)
+++ trunk/Tools/Scripts/webkitpy/performance_tests/perftest.py 2013-01-07 18:28:05 UTC (rev 138955)
@@ -52,6 +52,60 @@
_log = logging.getLogger(__name__)
+class PerfTestMetric(object):
+ def __init__(self, metric, unit=None, iterations=None):
+ self._metric = metric
+ self._iterations = iterations or []
+ self._unit = unit or self.metric_to_unit(metric)
+
+ def metric(self):
+ return self._metric
+
+ def has_values(self):
+ return bool(self._iterations)
+
+ # FIXME: We don't need to support this anymore. Make outputs more human friendly.
+ def legacy_chromium_bot_compatible_test_name(self, test_name_with_extension):
+ test_name = re.sub(r'\.\w+$', '', test_name_with_extension)
+ return test_name if self._metric == 'Time' else test_name + ':' + self._metric
+
+ def append(self, value):
+ self._iterations.append(value)
+
+ def to_dict(self):
+ assert self.has_values()
+ statistics = self.compute_statistics(self._iterations)
+ statistics['unit'] = self._unit
+ statistics['values'] = self._iterations
+ return statistics
+
+ @classmethod
+ def metric_to_unit(cls, metric):
+ assert metric in ('Time', 'Malloc', 'JSHeap')
+ return 'ms' if metric == 'Time' else 'bytes'
+
+ @staticmethod
+ def compute_statistics(values):
+ sorted_values = sorted(values)
+
+ # Compute the mean and variance using Knuth's online algorithm (has good numerical stability).
+ squareSum = 0
+ mean = 0
+ for i, time in enumerate(sorted_values):
+ delta = time - mean
+ sweep = i + 1.0
+ mean += delta / sweep
+ squareSum += delta * (time - mean)
+
+ middle = int(len(sorted_values) / 2)
+ result = {'avg': sum(sorted_values) / len(values),
+ 'min': sorted_values[0],
+ 'max': sorted_values[-1],
+ 'median': sorted_values[middle] if len(sorted_values) % 2 else (sorted_values[middle - 1] + sorted_values[middle]) / 2,
+ 'stdev': math.sqrt(squareSum / (len(sorted_values) - 1)) if len(sorted_values) > 1 else 0}
+ return result
+
+
class PerfTest(object):
def __init__(self, port, test_name, test_path):
self._port = port
@@ -71,22 +125,23 @@
def prepare(self, time_out_ms):
return True
+ def _create_driver(self):
+ return self._port.create_driver(worker_number=0, no_timeout=True)
+
def run(self, time_out_ms):
- driver = self._port.create_driver(worker_number=0, no_timeout=True)
+ driver = self._create_driver()
try:
- return self._run_with_driver(driver, time_out_ms)
+ metrics = self._run_with_driver(driver, time_out_ms)
finally:
driver.stop()
- def _run_with_driver(self, driver, time_out_ms):
- output = self.run_single(driver, self.test_path(), time_out_ms)
- self._filter_output(output)
- if self.run_failed(output):
- return None
+ if not metrics:
+ return metrics
- results = self.parse_output(output)
- if not results:
- return None
+ results = {}
+ for metric in metrics:
+ legacy_test_name = metric.legacy_chromium_bot_compatible_test_name(self.test_name())
+ results[legacy_test_name] = metric.to_dict()
if not self._port.get_option('profile'):
if self._description:
@@ -96,6 +151,14 @@
return results
+ def _run_with_driver(self, driver, time_out_ms):
+ output = self.run_single(driver, self.test_path(), time_out_ms)
+ self._filter_output(output)
+ if self.run_failed(output):
+ return None
+
+ return self.parse_output(output)
+
def run_single(self, driver, test_path, time_out_ms, should_run_pixel_test=False):
return driver.run_test(DriverInput(test_path, time_out_ms, image_hash=None, should_run_pixel_test=should_run_pixel_test), stop_when_done=False)
@@ -159,79 +222,36 @@
output.text = '\n'.join([line for line in re.split('\n', output.text) if not self._should_ignore_line_in_parser_test_result(line)])
_description_regex = re.compile(r'^Description: (?P<description>.*)$', re.IGNORECASE)
- _result_classes = ['Time', 'JS Heap', 'Malloc']
- _result_class_regex = re.compile(r'^(?P<resultclass>' + r'|'.join(_result_classes) + '):')
+ _metrics_regex = re.compile(r'^(?P<metric>Time|Malloc|JS Heap):')
_statistics_keys = ['avg', 'median', 'stdev', 'min', 'max', 'unit', 'values']
_score_regex = re.compile(r'^(?P<key>' + r'|'.join(_statistics_keys) + r')\s+(?P<value>([0-9\.]+(,\s+)?)+)\s*(?P<unit>.*)')
def parse_output(self, output):
- test_failed = False
- results = {}
- test_name = re.sub(r'\.\w+$', '', self._test_name)
- result_class = ""
+ current_metric = None
+ results = []
for line in re.split('\n', output.text):
if not line:
continue
description_match = self._description_regex.match(line)
+ metric_match = self._metrics_regex.match(line)
+ score = self._score_regex.match(line)
+
if description_match:
self._description = description_match.group('description')
- continue
-
- result_class_match = self._result_class_regex.match(line)
- if result_class_match:
- result_class = result_class_match.group('resultclass')
- continue
-
- score = self._score_regex.match(line)
- if score:
+ elif metric_match:
+ current_metric = metric_match.group('metric').replace(' ', '')
+ elif score:
key = score.group('key')
- if key == 'values':
- value = [float(number) for number in score.group('value').split(', ')]
- else:
- value = float(score.group('value'))
- unit = score.group('unit')
- name = test_name
- if result_class != 'Time':
- name += ':' + result_class.replace(' ', '')
- results.setdefault(name, {})
- results[name]['unit'] = unit
- results[name][key] = value
- continue
+ if key == 'values' and results != None:
+ values = [float(number) for number in score.group('value').split(', ')]
+ results.append(PerfTestMetric(current_metric, score.group('unit'), values))
+ else:
+ results = None
+ _log.error('ERROR: ' + line)
- test_failed = True
- _log.error('ERROR: ' + line)
-
- if test_failed:
- return None
-
- if set(self._statistics_keys) != set(results[test_name].keys()):
- _log.error("The test didn't report all statistics.")
- return None
-
return results
- @staticmethod
- def compute_statistics(values):
- sorted_values = sorted(values)
-
- # Compute the mean and variance using Knuth's online algorithm (has good numerical stability).
- squareSum = 0
- mean = 0
- for i, time in enumerate(sorted_values):
- delta = time - mean
- sweep = i + 1.0
- mean += delta / sweep
- squareSum += delta * (time - mean)
-
- middle = int(len(sorted_values) / 2)
- result = {'avg': sum(sorted_values) / len(values),
- 'min': sorted_values[0],
- 'max': sorted_values[-1],
- 'median': sorted_values[middle] if len(sorted_values) % 2 else (sorted_values[middle - 1] + sorted_values[middle]) / 2,
- 'stdev': math.sqrt(squareSum / (len(sorted_values) - 1))}
- return result
-
def output_statistics(self, test_name, results):
unit = results['unit']
_log.info('RESULT %s= %s %s' % (test_name.replace(':', ': ').replace('/', ': '), results['avg'], unit))
@@ -244,8 +264,13 @@
def __init__(self, port, test_name, test_path):
super(ChromiumStylePerfTest, self).__init__(port, test_name, test_path)
- def _run_with_driver(self, driver, time_out_ms):
- output = self.run_single(driver, self.test_path(), time_out_ms)
+ def run(self, time_out_ms):
+ driver = self._create_driver()
+ try:
+ output = self.run_single(driver, self.test_path(), time_out_ms)
+ finally:
+ driver.stop()
+
self._filter_output(output)
if self.run_failed(output):
return None
@@ -280,8 +305,9 @@
return super(PageLoadingPerfTest, self).run_single(driver, test_path, time_out_ms, should_run_pixel_test)
def _run_with_driver(self, driver, time_out_ms):
- results = {}
- results.setdefault(self.test_name(), {'unit': 'ms', 'values': []})
+ times = PerfTestMetric('Time')
+ malloc = PerfTestMetric('Malloc')
+ js_heap = PerfTestMetric('JSHeap')
for i in range(0, 20):
output = self.run_single(driver, self.test_path(), time_out_ms)
@@ -290,26 +316,20 @@
if i == 0:
continue
- results[self.test_name()]['values'].append(output.test_time * 1000)
-
+ times.append(output.test_time * 1000)
if not output.measurements:
continue
- for result_class, result in output.measurements.items():
- name = self.test_name() + ':' + result_class
- if not name in results:
- results.setdefault(name, {'values': []})
- results[name]['values'].append(result)
- if result_class == 'Malloc' or result_class == 'JSHeap':
- results[name]['unit'] = 'bytes'
+ for metric, result in output.measurements.items():
+ assert metric == 'Malloc' or metric == 'JSHeap'
+ if metric == 'Malloc':
+ malloc.append(result)
+ else:
+ js_heap.append(result)
- for result_class in results.keys():
- results[result_class].update(self.compute_statistics(results[result_class]['values']))
- self.output_statistics(result_class, results[result_class])
+ return filter(lambda metric: metric.has_values(), [times, malloc, js_heap])
- return results
-
class ReplayServer(object):
def __init__(self, archive, record):
self._process = None
Modified: trunk/Tools/Scripts/webkitpy/performance_tests/perftest_unittest.py (138954 => 138955)
--- trunk/Tools/Scripts/webkitpy/performance_tests/perftest_unittest.py 2013-01-07 18:27:05 UTC (rev 138954)
+++ trunk/Tools/Scripts/webkitpy/performance_tests/perftest_unittest.py 2013-01-07 18:28:05 UTC (rev 138955)
@@ -39,6 +39,7 @@
from webkitpy.performance_tests.perftest import ChromiumStylePerfTest
from webkitpy.performance_tests.perftest import PageLoadingPerfTest
from webkitpy.performance_tests.perftest import PerfTest
+from webkitpy.performance_tests.perftest import PerfTestMetric
from webkitpy.performance_tests.perftest import PerfTestFactory
from webkitpy.performance_tests.perftest import ReplayPerfTest
@@ -47,10 +48,44 @@
def __init__(self, custom_run_test=None):
super(MockPort, self).__init__(host=MockHost(), custom_run_test=custom_run_test)
-class MainTest(unittest.TestCase):
+
+class TestPerfTestMetric(unittest.TestCase):
+ def test_init_set_missing_unit(self):
+ self.assertEqual(PerfTestMetric('Time', iterations=[1, 2, 3, 4, 5]).to_dict()['unit'], 'ms')
+ self.assertEqual(PerfTestMetric('Malloc', iterations=[1, 2, 3, 4, 5]).to_dict()['unit'], 'bytes')
+ self.assertEqual(PerfTestMetric('JSHeap', iterations=[1, 2, 3, 4, 5]).to_dict()['unit'], 'bytes')
+
+ def test_legacy_chromium_bot_compatible_test_name(self):
+ self.assertEqual(PerfTestMetric('Time').legacy_chromium_bot_compatible_test_name('test'), 'test')
+ self.assertEqual(PerfTestMetric('Malloc').legacy_chromium_bot_compatible_test_name('test'), 'test:Malloc')
+ self.assertEqual(PerfTestMetric('JSHeap').legacy_chromium_bot_compatible_test_name('test'), 'test:JSHeap')
+ self.assertEqual(PerfTestMetric('FontSize', unit='em').legacy_chromium_bot_compatible_test_name('test'), 'test:FontSize')
+
+ def test_has_values(self):
+ self.assertFalse(PerfTestMetric('Time').has_values())
+ self.assertTrue(PerfTestMetric('Time', iterations=[1]).has_values())
+
+ def test_append(self):
+ metric = PerfTestMetric('Time')
+ metric2 = PerfTestMetric('Time')
+ self.assertFalse(metric.has_values())
+ self.assertFalse(metric2.has_values())
+
+ metric.append(1)
+ self.assertTrue(metric.has_values())
+ self.assertFalse(metric2.has_values())
+ self.assertEqual(metric.to_dict()['values'], [1])
+ metric.append(2)
+ self.assertEqual(metric.to_dict()['values'], [1, 2])
+
+ metric2.append(3)
+ self.assertTrue(metric2.has_values())
+ self.assertEqual(metric.to_dict()['values'], [1, 2])
+ self.assertEqual(metric2.to_dict()['values'], [3])
+
def test_compute_statistics(self):
def compute_statistics(values):
- statistics = PerfTest.compute_statistics(map(lambda x: float(x), values))
+ statistics = PerfTestMetric.compute_statistics(map(lambda x: float(x), values))
return json.loads(json.dumps(statistics))
statistics = compute_statistics([10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11])
@@ -62,17 +97,20 @@
self.assertEqual(compute_statistics([8, 9, 10, 11, 12])['avg'], 10)
self.assertEqual(compute_statistics([8, 9, 10, 11, 12] * 4)['avg'], 10)
self.assertEqual(compute_statistics([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])['avg'], 10)
- self.assertEqual(PerfTest.compute_statistics([1, 5, 2, 8, 7])['median'], 5)
- self.assertEqual(PerfTest.compute_statistics([1, 6, 2, 8, 7, 2])['median'], 4)
+ self.assertEqual(compute_statistics([1, 5, 2, 8, 7])['median'], 5)
+ self.assertEqual(compute_statistics([1, 6, 2, 8, 7, 2])['median'], 4)
self.assertAlmostEqual(statistics['stdev'], math.sqrt(35))
+ self.assertAlmostEqual(compute_statistics([1])['stdev'], 0)
self.assertAlmostEqual(compute_statistics([1, 2, 3, 4, 5, 6])['stdev'], math.sqrt(3.5))
self.assertAlmostEqual(compute_statistics([4, 2, 5, 8, 6])['stdev'], math.sqrt(5))
+
+class TestPerfTest(unittest.TestCase):
def _assert_results_are_correct(self, test, output):
test._filter_output(output)
parsed_results = test.parse_output(output)
- self.assertEqual(parsed_results.keys(), ['some-test'])
- some_test_results = parsed_results['some-test']
+ self.assertEqual(len(parsed_results), 1)
+ some_test_results = parsed_results[0].to_dict()
self.assertEqual(sorted(some_test_results.keys()), ['avg', 'max', 'median', 'min', 'stdev', 'unit', 'values'])
self.assertEqual(some_test_results['values'], [1080, 1120, 1095, 1101, 1104])
self.assertEqual(some_test_results['min'], 1080)
@@ -221,15 +259,19 @@
output_capture = OutputCapture()
output_capture.capture_output()
try:
- self.assertEqual(test._run_with_driver(driver, None),
- {'some-test': {'max': 20000, 'avg': 11000.0, 'median': 11000, 'stdev': 5627.314338711378, 'min': 2000, 'unit': 'ms',
- 'values': [float(i * 1000) for i in range(2, 21)]}})
+ metrics = test._run_with_driver(driver, None)
finally:
actual_stdout, actual_stderr, actual_logs = output_capture.restore_output()
+
self.assertEqual(actual_stdout, '')
self.assertEqual(actual_stderr, '')
- self.assertEqual(actual_logs, 'RESULT some-test= 11000 ms\nmedian= 11000 ms, stdev= 5627.31433871 ms, min= 2000 ms, max= 20000 ms\n')
+ self.assertEqual(actual_logs, '')
+ self.assertEqual(len(metrics), 1)
+ self.assertEqual(metrics[0].metric(), 'Time')
+ self.assertEqual(metrics[0].to_dict(), {'max': 20000, 'avg': 11000.0, 'median': 11000, 'stdev': 5627.314338711378, 'min': 2000, 'unit': 'ms',
+ 'values': [float(i * 1000) for i in range(2, 21)]})
+
def test_run_with_memory_output(self):
port = MockPort()
test = PageLoadingPerfTest(port, 'some-test', '/path/some-dir/some-test')
@@ -239,21 +281,25 @@
output_capture = OutputCapture()
output_capture.capture_output()
try:
- self.assertEqual(test._run_with_driver(driver, None),
- {'some-test': {'max': 20000, 'avg': 11000.0, 'median': 11000, 'stdev': 5627.314338711378, 'min': 2000, 'unit': 'ms',
- 'values': [float(i * 1000) for i in range(2, 21)]},
- 'some-test:Malloc': {'max': 10, 'avg': 10.0, 'median': 10, 'min': 10, 'stdev': 0.0, 'unit': 'bytes',
- 'values': [float(10)] * 19},
- 'some-test:JSHeap': {'max': 5, 'avg': 5.0, 'median': 5, 'min': 5, 'stdev': 0.0, 'unit': 'bytes',
- 'values': [float(5)] * 19}})
+ metrics = test._run_with_driver(driver, None)
finally:
actual_stdout, actual_stderr, actual_logs = output_capture.restore_output()
+
self.assertEqual(actual_stdout, '')
self.assertEqual(actual_stderr, '')
- self.assertEqual(actual_logs, 'RESULT some-test= 11000 ms\nmedian= 11000 ms, stdev= 5627.31433871 ms, min= 2000 ms, max= 20000 ms\n'
- + 'RESULT some-test: Malloc= 10 bytes\nmedian= 10 bytes, stdev= 0.0 bytes, min= 10 bytes, max= 10 bytes\n'
- + 'RESULT some-test: JSHeap= 5 bytes\nmedian= 5 bytes, stdev= 0.0 bytes, min= 5 bytes, max= 5 bytes\n')
+ self.assertEqual(actual_logs, '')
+ self.assertEqual(len(metrics), 3)
+ self.assertEqual(metrics[0].metric(), 'Time')
+ self.assertEqual(metrics[0].to_dict(), {'max': 20000, 'avg': 11000.0, 'median': 11000, 'stdev': 5627.314338711378, 'min': 2000, 'unit': 'ms',
+ 'values': [float(i * 1000) for i in range(2, 21)]})
+ self.assertEqual(metrics[1].metric(), 'Malloc')
+ self.assertEqual(metrics[1].to_dict(), {'max': 10, 'avg': 10.0, 'median': 10, 'min': 10, 'stdev': 0.0, 'unit': 'bytes',
+ 'values': [float(10)] * 19})
+ self.assertEqual(metrics[2].metric(), 'JSHeap')
+ self.assertEqual(metrics[2].to_dict(), {'max': 5, 'avg': 5.0, 'median': 5, 'min': 5, 'stdev': 0.0, 'unit': 'bytes',
+ 'values': [float(5)] * 19})
+
def test_run_with_bad_output(self):
output_capture = OutputCapture()
output_capture.capture_output()