jenkins-bot has submitted this change and it was merged. Change subject: Restore speedy non-selser serialization ......................................................................
Restore speedy non-selser serialization * Fixes the large perf regression Flow was/is seeing. * In I104982e061abf20c63a6342caa5600a090c9ced7, we made sure that non-selser didn't need the original source. * Now, if an oldid isn't supplied, we don't fetch the source and fall back to non-selser. This used to be case before the refactor in I27176bd4b62562a7aead3d40679d31ad0b8f3ab8 (see the review there where this was all discussed). Bug: T98408 Change-Id: I44834aed117de810eaa8dba596cf8337ee423f9d --- M api/routes.js M lib/mediawiki.DOMUtils.js M lib/mediawiki.SelectiveSerializer.js M tests/mocha/api.js M tests/roundtrip-test.js M tests/rttest.localsettings.js 6 files changed, 23 insertions(+), 30 deletions(-) Approvals: Subramanya Sastry: Looks good to me, approved Matthias Mullie: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/api/routes.js b/api/routes.js index 9e07508..8d921c9 100644 --- a/api/routes.js +++ b/api/routes.js @@ -267,14 +267,7 @@ } } - // This isn't part of the public API. Just a convenience to enable - // selser for roundtrip testing. - var useSelser = parsoidConfig.useSelser; - if (parsoidConfig.rtTestMode && req.body.hasOwnProperty('_rtSelser')) { - useSelser = !(!req.body._rtSelser || req.body._rtSelser === "false"); - } - - var p = DU.serializeDOM(env, doc.body, useSelser) + var p = DU.serializeDOM(env, doc.body, parsoidConfig.useSelser) .timeout(REQ_TIMEOUT) .then(function(output) { var contentType = 'text/plain;profile=mediawiki.org/specs/wikitext/1.0.0;charset=utf-8'; diff --git a/lib/mediawiki.DOMUtils.js b/lib/mediawiki.DOMUtils.js index ce0ab1f..e81c8fc 100644 --- a/lib/mediawiki.DOMUtils.js +++ b/lib/mediawiki.DOMUtils.js @@ -2550,23 +2550,19 @@ console.assert(DU.isBody(body), 'Expected a body node.'); + var hasOldId = (env.page.id && env.page.id !== '0'); var needsWt = (env.page.src === null); - var needsDOM = useSelser && !(env.page.dom || env.page.domdiff); - var useCache = env.conf.parsoid.parsoidCacheURI && - (env.page.id && env.page.id !== '0'); + var needsDOM = !(env.page.dom || env.page.domdiff); + var useCache = env.conf.parsoid.parsoidCacheURI && hasOldId; var steps = []; - if (needsWt) { + if (needsWt && hasOldId) { steps.push(function() { var target = env.resolveTitle(env.normalizeTitle(env.page.name), ''); return TemplateRequest.setPageSrcInfo( env, target, env.page.id ).catch(function(err) { env.log('error', 'Error while fetching page source.'); - // Set the env.page.src to the empty string. - // An alternative would be to audit all the uses to handle - // it as uninitialized (ie. null). - env.setPageSrcInfo(null); }); }); } @@ -2583,6 +2579,11 @@ }); } else { steps.push(function() { + if (env.page.src === null) { + // The src fetch failed or we never had an oldid. + // We'll just fallback to non-selser. + return; + } return env.pipelineFactory.parse( env, env.page.src ).then(function(doc) { @@ -2596,7 +2597,9 @@ // If we can, perform these steps in parallel (w/ map). var p; - if (useCache) { + if (!useSelser) { + p = Promise.resolve(); + } else if (useCache) { p = Promise.map(steps, function(func) { return func(); }); } else { p = Promise.reduce(steps, function(prev, func) { @@ -2606,7 +2609,7 @@ return p.then(function() { var Serializer = useSelser ? SelectiveSerializer : WikitextSerializer; - var serializer = new Serializer({ env: env, oldid: env.page.id }); + var serializer = new Serializer({ env: env }); return serializer.serializeDOMSync(body); }).nodify(cb); }; diff --git a/lib/mediawiki.SelectiveSerializer.js b/lib/mediawiki.SelectiveSerializer.js index de3e142..5bb0fc4 100644 --- a/lib/mediawiki.SelectiveSerializer.js +++ b/lib/mediawiki.SelectiveSerializer.js @@ -22,7 +22,6 @@ * * @param options {Object} Options for the serializer. * @param options.env {MWParserEnvironment} - * @param options.oldid {string} The revision ID you want to compare to (defaults to latest revision) */ var SelectiveSerializer = function(options) { // Set edit mode @@ -53,7 +52,7 @@ var out; var startTimers = new Map(); - if ((!this.env.page.dom && !this.env.page.domdiff) || !this.env.page.src) { + if ((!this.env.page.dom && !this.env.page.domdiff) || this.env.page.src === null) { if (this.timer) { startTimers.set('html2wt.full.serialize', Date.now()); } diff --git a/tests/mocha/api.js b/tests/mocha/api.js index e8de620..60de2eb 100644 --- a/tests/mocha/api.js +++ b/tests/mocha/api.js @@ -480,6 +480,7 @@ .send({ html: "<html><body id=\"mwAA\"><div id=\"mwBB\">Selser test</div></body></html>", original: { + revid: 2, title: "Junk Page", html: { body: "<html><body id=\"mwAA\"><div id=\"mwBB\">Selser test</div></body></html>", @@ -504,14 +505,14 @@ it('should fallback to non-selective serialization', function(done) { // Without the original wikitext and an unavailable - // TemplateFetch for the source (Doestnotexist will 404), + // TemplateFetch for the source (no revision id provided), // it should fallback to non-selective serialization. request(api) .post('v2/' + mockHost + '/wt/') .send({ html: "<html><body id=\"mwAA\"><div id=\"mwBB\">Selser test</div></body></html>", original: { - title: "Doesnotexist", + title: "Junk Page", html: { body: "<html><body id=\"mwAA\"><div id=\"mwBB\">Selser test</div></body></html>", }, diff --git a/tests/roundtrip-test.js b/tests/roundtrip-test.js index ba99e12..e748b48 100755 --- a/tests/roundtrip-test.js +++ b/tests/roundtrip-test.js @@ -516,16 +516,15 @@ } uri += 'v2/' + options.domain + '/'; if (options.html2wt) { - uri += 'wt/' + title + '/' + options.oldid; + uri += 'wt/' + title; + if (options.oldid) { + uri += '/' + options.oldid; + } httpOptions.body.scrubWikitext = true; } else { // wt2html uri += 'pagebundle/' + title; } httpOptions.uri = uri; - - if (options.useSelser) { - httpOptions.body._rtSelser = true; - } return new Promise(function(resolve, reject) { // TODO: convert Util.retryingHTTPRequest to a promise returning func @@ -646,12 +645,10 @@ var options = Object.assign({ html2wt: true, recordSizes: true, - oldid: env.page.meta.revision.revid, data: { html: data.oldHTML, original: { 'data-parsoid': data.oldDp, - wikitext: { body: data.oldWt }, }, }, }, parsoidOptions); diff --git a/tests/rttest.localsettings.js b/tests/rttest.localsettings.js index 2d6373c..f94b517 100644 --- a/tests/rttest.localsettings.js +++ b/tests/rttest.localsettings.js @@ -32,7 +32,7 @@ // parsoidConfig.usePHPPreProcessor = false; // Use selective serialization (default false) - // parsoidConfig.useSelser = true; + parsoidConfig.useSelser = true; // Allow cross-domain requests to the API (default '*') // Sets Access-Control-Allow-Origin header -- To view, visit https://gerrit.wikimedia.org/r/210843 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I44834aed117de810eaa8dba596cf8337ee423f9d Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Cscott <canan...@wikimedia.org> Gerrit-Reviewer: Mattflaschen <mflasc...@wikimedia.org> Gerrit-Reviewer: Matthias Mullie <mmul...@wikimedia.org> Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits