Modified: trunk/Tools/BuildSlaveSupport/build.webkit.org-config/public_html/TestFailures/FlakyLayoutTestDetector.js (93154 => 93155)
--- trunk/Tools/BuildSlaveSupport/build.webkit.org-config/public_html/TestFailures/FlakyLayoutTestDetector.js 2011-08-16 21:12:10 UTC (rev 93154)
+++ trunk/Tools/BuildSlaveSupport/build.webkit.org-config/public_html/TestFailures/FlakyLayoutTestDetector.js 2011-08-16 21:12:44 UTC (rev 93155)
@@ -25,6 +25,7 @@
function FlakyLayoutTestDetector() {
this._tests = {};
+ this._buildCount = 0;
}
FlakyLayoutTestDetector.prototype = {
@@ -37,11 +38,20 @@
return newFlakyTests;
}
+ ++this._buildCount;
+
// Record failing tests.
for (var testName in failingTests) {
if (!(testName in this._tests)) {
+ if (this._buildCount > this._maximumFailOrPassCount) {
+ // This test hasn't failed in the _maximumFailOrPassCount most recent builds, so
+ // don't consider it to be flaky. In fact, we don't have to track it at all!
+ continue;
+ }
+
this._tests[testName] = {
state: this._states.LastSeenFailing,
+ count: 0,
history: [],
};
}
@@ -49,7 +59,11 @@
var testData = this._tests[testName];
testData.history.push({ build: buildName, result: failingTests[testName] });
- if (testData.state === this._states.LastSeenPassing) {
+ if (testData.state === this._states.LastSeenFailing) {
+ ++testData.count;
+ if (testData.count > this._maximumFailOrPassCount)
+ testData.state = this._states.NotFlaky;
+ } else if (testData.state === this._states.LastSeenPassing) {
testData.state = this._states.PossiblyFlaky;
newFlakyTests.push(testName);
}
@@ -63,8 +77,14 @@
var testData = this._tests[testName];
testData.history.push({ build: buildName, result: { failureType: 'pass' } });
- if (testData.state === this._states.LastSeenFailing)
+ if (testData.state === this._states.LastSeenPassing) {
+ ++testData.count;
+ if (testData.count > this._maximumFailOrPassCount)
+ testData.state = this._states.NotFlaky;
+ } else if (testData.state === this._states.LastSeenFailing) {
testData.state = this._states.LastSeenPassing;
+ testData.count = 1;
+ }
}
return newFlakyTests;
@@ -82,9 +102,14 @@
return Object.keys(self._tests).filter(function(testName) { return self._tests[testName].state === self._states.PossiblyFlaky });
},
+ // If a test has recently failed or passed more than this number of times in a row we don't
+ // consider it to be flaky.
+ _maximumFailOrPassCount: 9,
+
_states: {
LastSeenFailing: 0,
LastSeenPassing: 1,
PossiblyFlaky: 2,
+ NotFlaky: 3,
},
};
Modified: trunk/Tools/BuildSlaveSupport/build.webkit.org-config/public_html/TestFailures/FlakyLayoutTestDetector_unittests.js (93154 => 93155)
--- trunk/Tools/BuildSlaveSupport/build.webkit.org-config/public_html/TestFailures/FlakyLayoutTestDetector_unittests.js 2011-08-16 21:12:10 UTC (rev 93154)
+++ trunk/Tools/BuildSlaveSupport/build.webkit.org-config/public_html/TestFailures/FlakyLayoutTestDetector_unittests.js 2011-08-16 21:12:44 UTC (rev 93155)
@@ -99,4 +99,70 @@
equal(detector.allFailures('c'), null);
});
+test('failing many times in a row should not count as flaky', 3, function() {
+ var detector = new FlakyLayoutTestDetector();
+ for (var i = 0; i < 10; ++i)
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+
+ deepEqual(detector.possiblyFlakyTests, []);
+
+ for (var i = 0; i < 3; ++i)
+ detector.incorporateTestResults('build', {}, false);
+
+ deepEqual(detector.possiblyFlakyTests, []);
+
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+
+ deepEqual(detector.possiblyFlakyTests, []);
+});
+
+test('failing after passing many times in a row should not count as flaky', 3, function() {
+ var detector = new FlakyLayoutTestDetector();
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+
+ deepEqual(detector.possiblyFlakyTests, []);
+
+ for (var i = 0; i < 10; ++i)
+ detector.incorporateTestResults('build', {}, false);
+
+ deepEqual(detector.possiblyFlakyTests, []);
+
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+
+ deepEqual(detector.possiblyFlakyTests, []);
+});
+
+test('flaking now should override many past failures', 1, function() {
+ var detector = new FlakyLayoutTestDetector();
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+ detector.incorporateTestResults('build', {}, false);
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+ detector.incorporateTestResults('build', {}, false);
+ for (var i = 0; i < 10; ++i)
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+ deepEqual(detector.possiblyFlakyTests, ['a']);
+});
+
+test('passing now should override past flakiness', 1, function() {
+ var detector = new FlakyLayoutTestDetector();
+ for (var i = 0; i < 10; ++i)
+ detector.incorporateTestResults('build', {}, false);
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+ detector.incorporateTestResults('build', {}, false);
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+ detector.incorporateTestResults('build', {}, false);
+ deepEqual(detector.possiblyFlakyTests, []);
+});
+
+test('too many failures now should not override past flakiness', 1, function() {
+ var detector = new FlakyLayoutTestDetector();
+ for (var i = 0; i < 10; ++i)
+ detector.incorporateTestResults('build', {}, true);
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+ detector.incorporateTestResults('build', {}, false);
+ detector.incorporateTestResults('build', { a: 'fail' }, false);
+ detector.incorporateTestResults('build', {}, false);
+ deepEqual(detector.possiblyFlakyTests, ['a']);
+});
+
})();
Modified: trunk/Tools/ChangeLog (93154 => 93155)
--- trunk/Tools/ChangeLog 2011-08-16 21:12:10 UTC (rev 93154)
+++ trunk/Tools/ChangeLog 2011-08-16 21:12:44 UTC (rev 93155)
@@ -1,3 +1,28 @@
+2011-08-16 Adam Roben <aro...@apple.com>
+
+ Teach TestFailures that tests that have failed or passed many times in a row are not flaky
+
+ The basic strategy here is that once a test has failed or passed many times in a row we
+ never again consider it as a possibly flaky test. It's a simple strategy but seems to result
+ in many fewer false positives than our current behavior.
+
+ Fixes <http://webkit.org/b/66327> TestFailures page considers far too many tests to be
+ flaky, including tests that failed for a while but then were fixed
+
+ Reviewed by Dan Bates.
+
+ * BuildSlaveSupport/build.webkit.org-config/public_html/TestFailures/FlakyLayoutTestDetector.js:
+ (FlakyLayoutTestDetector): Initialize new _buildCount property, which is used to track the
+ number of non-too-many-failure builds we've seen.
+ (FlakyLayoutTestDetector.prototype.incorporateTestResults): Don't track tests which haven't
+ failed in the _maximumFailOrPassCount most recent builds. For other not-yet-considered-flaky
+ tests, keep track of how many times they have passed or failed in a row. If they pass or
+ fail more than _maximumFailOrPassCount times, consider them to be non-flaky. (Once a test is
+ considered flaky it doesn't matter how many times it passes or fails.)
+
+ * BuildSlaveSupport/build.webkit.org-config/public_html/TestFailures/FlakyLayoutTestDetector_unittests.js:
+ Added tests for the above.
+
2011-08-16 Dimitri Glazkov <dglaz...@chromium.org>
garden-o-matic needs a summary view with actions for each problem.