Diff
Modified: trunk/Tools/ChangeLog (92152 => 92153)
--- trunk/Tools/ChangeLog 2011-08-01 23:49:54 UTC (rev 92152)
+++ trunk/Tools/ChangeLog 2011-08-02 00:07:14 UTC (rev 92153)
@@ -1,3 +1,26 @@
+2011-08-01 Adam Barth <aba...@webkit.org>
+
+ webkit-patch needs to be able to "optimize" the storage of baselines on disk
+ https://bugs.webkit.org/show_bug.cgi?id=65418
+
+ Reviewed by Dimitri Glazkov.
+
+ If we're not careful when rebaselining tests, we can end up with lots
+ of duplicate expected results files in the tree. This patch adds a
+ webkit-patch command that optimizes the storage of expected results on
+ disk.
+
+ This command is similar to deduplicate-tests, except that it can move
+ test results around rather than just remove duplicate results.
+
+ Unfortunately, this problem is very tricky because the baseline search
+ structure is a hypergraph. This patch include a huerstic optimizer
+ that appears to work on a bunch of examples I've tried. We'll likely
+ need to refine it as gain experience.
+
+ * Scripts/webkitpy/common/system/filesystem.py:
+ * Scripts/webkitpy/tool/commands/rebaseline.py:
+
2011-08-01 Dimitri Glazkov <dglaz...@chromium.org>
Teach TestExpectationSerializer about parsed expectations.
Added: trunk/Tools/Scripts/webkitpy/common/checkout/baselineoptimizer.py (0 => 92153)
--- trunk/Tools/Scripts/webkitpy/common/checkout/baselineoptimizer.py (rev 0)
+++ trunk/Tools/Scripts/webkitpy/common/checkout/baselineoptimizer.py 2011-08-02 00:07:14 UTC (rev 92153)
@@ -0,0 +1,154 @@
+# Copyright (C) 2011, Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from webkitpy.layout_tests.port import factory as port_factory
+
+
+# Yes, it's a hypergraph.
+# FIXME: Should this function live with the ports somewhere?
+def _baseline_search_hypergraph(fs):
+ hypergraph = {}
+
+ # These edges in the hypergraph aren't visible on build.webkit.org,
+ # but they impose constraints on how we optimize baselines.
+ hypergraph['mac-future'] = ['LayoutTests/platform/mac-future', 'LayoutTests/platform/mac']
+ hypergraph['qt-unknown'] = ['LayoutTests/platform/qt-unknown', 'LayoutTests/platform/qt']
+
+ for port_name in port_factory.all_port_names():
+ port = port_factory.get(port_name)
+ webkit_base = port.webkit_base()
+ search_path = port.baseline_search_path()
+ if search_path:
+ hypergraph[port_name] = [fs.relpath(path, webkit_base) for path in search_path]
+ return hypergraph
+
+
+# FIXME: Should this function be somewhere more general?
+def _invert_dictionary(dictionary):
+ inverted_dictionary = {}
+ for key, value in dictionary.items():
+ if inverted_dictionary.get(value):
+ inverted_dictionary[value].append(key)
+ else:
+ inverted_dictionary[value] = [key]
+ return inverted_dictionary
+
+
+class BaselineOptimizer(object):
+ def __init__(self, scm, filesystem):
+ self._scm = scm
+ self._filesystem = filesystem
+ self._hypergraph = _baseline_search_hypergraph(self._filesystem)
+ self._directories = reduce(set.union, map(set, self._hypergraph.values()))
+
+ def _read_results_by_directory(self, baseline_name):
+ results_by_directory = {}
+ for directory in self._directories:
+ path = self._filesystem.join(self._scm.checkout_root, directory, baseline_name)
+ if self._filesystem.exists(path):
+ results_by_directory[directory] = self._filesystem.sha1(path)
+ return results_by_directory
+
+ def _results_by_port_name(self, results_by_directory):
+ results_by_port_name = {}
+ for port_name, search_path in self._hypergraph.items():
+ for directory in search_path:
+ if directory in results_by_directory:
+ results_by_port_name[port_name] = results_by_directory[directory]
+ break
+ return results_by_port_name
+
+ def _most_specific_common_directory(self, port_names):
+ paths = [self._hypergraph[port_name] for port_name in port_names]
+ common_directories = reduce(set.intersection, map(set, paths))
+
+ def score(directory):
+ return sum([path.index(directory) for path in paths])
+
+ _, directory = sorted([(score(directory), directory) for directory in common_directories])[0]
+ return directory
+
+ def _filter_port_names_by_result(self, predicate, port_names_by_result):
+ filtered_port_names_by_result = {}
+ for result, port_names in port_names_by_result.items():
+ filtered_port_names = filter(predicate, port_names)
+ if filtered_port_names:
+ filtered_port_names_by_result[result] = filtered_port_names
+ return filtered_port_names_by_result
+
+ def _place_results_in_most_specific_common_directory(self, port_names_by_result, results_by_directory):
+ for result, port_names in port_names_by_result.items():
+ directory = self._most_specific_common_directory(port_names)
+ results_by_directory[directory] = result
+
+ def _find_optimal_result_placement(self, baseline_name):
+ results_by_directory = self._read_results_by_directory(baseline_name)
+ results_by_port_name = self._results_by_port_name(results_by_directory)
+ port_names_by_result = _invert_dictionary(results_by_port_name)
+
+ new_results_by_directory = {}
+ unsatisfied_port_names_by_result = port_names_by_result
+ while unsatisfied_port_names_by_result:
+ self._place_results_in_most_specific_common_directory(unsatisfied_port_names_by_result, new_results_by_directory)
+ new_results_by_port_name = self._results_by_port_name(new_results_by_directory)
+
+ def is_unsatisfied(port_name):
+ return results_by_port_name[port_name] != new_results_by_port_name[port_name]
+
+ new_unsatisfied_port_names_by_result = self._filter_port_names_by_result(is_unsatisfied, port_names_by_result)
+
+ if len(new_unsatisfied_port_names_by_result.values()) >= len(unsatisfied_port_names_by_result.values()):
+ break # Frowns. We do not appear to be converging.
+ unsatisfied_port_names_by_result = new_unsatisfied_port_names_by_result
+
+ return results_by_directory, new_results_by_directory
+
+ def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory):
+ source_directory_for_result = {}
+ for directory, result in results_by_directory.items():
+ source_directory_for_result[result] = directory
+
+ for directory, result in new_results_by_directory.items():
+ if results_by_directory.get(directory) != result:
+ source = self._filesystem.join(self._scm.checkout_root, source_directory_for_result[result], baseline_name)
+ destination = self._filesystem.join(self._scm.checkout_root, directory, baseline_name)
+ self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0])
+ self._filesystem.copyfile(source, destination)
+ self._scm.add(destination)
+
+ for directory, result in results_by_directory.items():
+ if new_results_by_directory.get(directory) != result:
+ file_name = self._filesystem.join(self._scm.checkout_root, directory, baseline_name)
+ self._scm.delete(file_name)
+
+ def optimize(self, baseline_name):
+ results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name)
+ if self._results_by_port_name(results_by_directory) != self._results_by_port_name(new_results_by_directory):
+ return False
+ self._move_baselines(baseline_name, results_by_directory, new_results_by_directory)
+ return True
Added: trunk/Tools/Scripts/webkitpy/common/checkout/baselineoptimizer_unittest.py (0 => 92153)
--- trunk/Tools/Scripts/webkitpy/common/checkout/baselineoptimizer_unittest.py (rev 0)
+++ trunk/Tools/Scripts/webkitpy/common/checkout/baselineoptimizer_unittest.py 2011-08-02 00:07:14 UTC (rev 92153)
@@ -0,0 +1,111 @@
+# Copyright (C) 2011 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+from webkitpy.common.checkout.baselineoptimizer import BaselineOptimizer
+from webkitpy.common.system.filesystem_mock import MockFileSystem
+from webkitpy.tool.mocktool import MockSCM
+
+
+class TestBaselineOptimizer(BaselineOptimizer):
+ def __init__(self, mock_results_by_directory):
+ BaselineOptimizer.__init__(self, MockSCM(), MockFileSystem())
+ self._mock_results_by_directory = mock_results_by_directory
+
+ # We override this method for testing so we don't have to construct an
+ # elaborate mock file system.
+ def _read_results_by_directory(self, baseline_name):
+ return self._mock_results_by_directory
+
+
+class BaselineOptimizerTest(unittest.TestCase):
+ def _assertOptimization(self, results_by_directory, expected_new_results_by_directory):
+ baseline_optimizer = TestBaselineOptimizer(results_by_directory)
+ _, new_results_by_directory = baseline_optimizer._find_optimal_result_placement('mock-baseline.png')
+ self.assertEqual(new_results_by_directory, expected_new_results_by_directory)
+
+ def test_chromium_linux_redundant_with_win(self):
+ self._assertOptimization({
+ 'LayoutTests/platform/chromium-win': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ 'LayoutTests/platform/chromium-linux': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ }, {
+ 'LayoutTests/platform/chromium-win': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ })
+
+ def test_chromium_covers_mac_win_linux(self):
+ self._assertOptimization({
+ 'LayoutTests/platform/chromium-mac': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ 'LayoutTests/platform/chromium-win': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ 'LayoutTests/platform/chromium-linux': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ }, {
+ 'LayoutTests/platform/chromium': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ })
+
+ def test_chromium_mac_redundant_with_apple_mac(self):
+ self._assertOptimization({
+ 'LayoutTests/platform/chromium-mac-snowleopard': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ 'LayoutTests/platform/mac-snowleopard': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ }, {
+ 'LayoutTests/platform/mac-snowleopard': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ })
+
+ def test_mac_future(self):
+ self._assertOptimization({
+ 'LayoutTests/platform/mac-snowleopard': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ }, {
+ 'LayoutTests/platform/mac-snowleopard': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ })
+
+ def test_qt_unknown(self):
+ self._assertOptimization({
+ 'LayoutTests/platform/qt': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ }, {
+ 'LayoutTests/platform/qt': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ })
+
+ def test_complex_shadowing(self):
+ self._assertOptimization({
+ 'LayoutTests/platform/chromium-win': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ 'LayoutTests/platform/mac': '5daa78e55f05d9f0d1bb1f32b0cd1bc3a01e9364',
+ 'LayoutTests/platform/chromium-win-xp': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ 'LayoutTests/platform/chromium-mac-leopard': '65e7d42f8b4882b29d46dc77bb879dd41bc074dc',
+ 'LayoutTests/platform/mac-leopard': '7ad045ece7c030e2283c5d21d9587be22bcba56e',
+ 'LayoutTests/platform/chromium-win-vista': 'f83af9732ce74f702b8c9c4a3d9a4c6636b8d3bd',
+ 'LayoutTests/platform/win': '5b1253ef4d5094530d5f1bc6cdb95c90b446bec7',
+ 'LayoutTests/platform/chromium-linux': 'f52fcdde9e4be8bd5142171cd859230bd4471036',
+ }, {
+ 'LayoutTests/platform/chromium-win': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ 'LayoutTests/platform/mac': '5daa78e55f05d9f0d1bb1f32b0cd1bc3a01e9364',
+ 'LayoutTests/platform/chromium-win-xp': '462d03b9c025db1b0392d7453310dbee5f9a9e74',
+ 'LayoutTests/platform/chromium-mac-leopard': '65e7d42f8b4882b29d46dc77bb879dd41bc074dc',
+ 'LayoutTests/platform/mac-leopard': '7ad045ece7c030e2283c5d21d9587be22bcba56e',
+ 'LayoutTests/platform/chromium-win-vista': 'f83af9732ce74f702b8c9c4a3d9a4c6636b8d3bd',
+ 'LayoutTests/platform/win': '5b1253ef4d5094530d5f1bc6cdb95c90b446bec7',
+ 'LayoutTests/platform/chromium-linux': 'f52fcdde9e4be8bd5142171cd859230bd4471036'
+ })
Modified: trunk/Tools/Scripts/webkitpy/common/system/filesystem.py (92152 => 92153)
--- trunk/Tools/Scripts/webkitpy/common/system/filesystem.py 2011-08-01 23:49:54 UTC (rev 92152)
+++ trunk/Tools/Scripts/webkitpy/common/system/filesystem.py 2011-08-02 00:07:14 UTC (rev 92153)
@@ -34,6 +34,7 @@
import errno
import exceptions
import glob
+import hashlib
import os
import shutil
import sys
@@ -223,6 +224,10 @@
with codecs.open(path, 'w', 'utf8') as f:
f.write(contents)
+ def sha1(self, path):
+ contents = self.read_binary_file(path)
+ return hashlib.sha1(contents).hexdigest()
+
def relpath(self, path, start='.'):
return ospath.relpath(path, start)
Modified: trunk/Tools/Scripts/webkitpy/common/system/filesystem_mock.py (92152 => 92153)
--- trunk/Tools/Scripts/webkitpy/common/system/filesystem_mock.py 2011-08-01 23:49:54 UTC (rev 92152)
+++ trunk/Tools/Scripts/webkitpy/common/system/filesystem_mock.py 2011-08-02 00:07:14 UTC (rev 92153)
@@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import errno
+import hashlib
import os
import re
@@ -284,6 +285,10 @@
def write_text_file(self, path, contents):
return self.write_binary_file(path, contents.encode('utf-8'))
+ def sha1(self, path):
+ contents = self.read_binary_file(path)
+ return hashlib.sha1(contents).hexdigest()
+
def relpath(self, path, start='.'):
return ospath.relpath(path, start, self.abspath, self.sep)
Modified: trunk/Tools/Scripts/webkitpy/layout_tests/port/base.py (92152 => 92153)
--- trunk/Tools/Scripts/webkitpy/layout_tests/port/base.py 2011-08-01 23:49:54 UTC (rev 92152)
+++ trunk/Tools/Scripts/webkitpy/layout_tests/port/base.py 2011-08-02 00:07:14 UTC (rev 92153)
@@ -541,6 +541,9 @@
"""Return the absolute path to the top of the LayoutTests directory."""
return self.path_from_webkit_base('LayoutTests')
+ def webkit_base(self):
+ return self._filesystem.abspath(self.path_from_webkit_base('.'))
+
def skipped_layout_tests(self):
return []
Modified: trunk/Tools/Scripts/webkitpy/tool/commands/rebaseline.py (92152 => 92153)
--- trunk/Tools/Scripts/webkitpy/tool/commands/rebaseline.py 2011-08-01 23:49:54 UTC (rev 92152)
+++ trunk/Tools/Scripts/webkitpy/tool/commands/rebaseline.py 2011-08-02 00:07:14 UTC (rev 92153)
@@ -32,9 +32,11 @@
import urllib
import webkitpy.common.config.urls as config_urls
+from webkitpy.common.checkout.baselineoptimizer import BaselineOptimizer
from webkitpy.common.net.buildbot import BuildBot
from webkitpy.common.net.layouttestresults import LayoutTestResults
from webkitpy.common.system.user import User
+from webkitpy.layout_tests.layout_package.test_result_writer import TestResultWriter
from webkitpy.layout_tests.models import test_failures
from webkitpy.layout_tests.port import factory
from webkitpy.tool.grammar import pluralize
@@ -128,6 +130,25 @@
self._rebaseline_test(args[0], args[1], args[2])
+class OptimizeBaselines(AbstractDeclarativeCommand):
+ name = "optimize-baselines"
+ help_text = "Reshuffles the baselines for a the given test to use as litte space on disk as possible."
+ argument_names = "TEST_NAME"
+
+ # FIXME: Should TestResultWriter know how to compute this string?
+ def _baseline_name(self, test_name, suffix):
+ return self._tool.filesystem.splitext(test_name)[0] + TestResultWriter.FILENAME_SUFFIX_EXPECTED + suffix
+
+ def execute(self, options, args, tool):
+ baseline_optimizer = BaselineOptimizer(tool.scm(), tool.filesystem)
+
+ test_name = args[0]
+ for suffix in ['.png', '.txt']:
+ baseline_name = self._baseline_name(test_name, suffix)
+ if not baseline_optimizer.optimize(baseline_name):
+ print "Hueristics failed to optimize %s" % baseline_name
+
+
class Rebaseline(AbstractDeclarativeCommand):
name = "rebaseline"
help_text = "Replaces local expected.txt files with new results from build bots"