Mholloway has uploaded a new change for review. https://gerrit.wikimedia.org/r/299296
Change subject: Perform Wiktionary-specific transform queries only for Wiktionary requests ...................................................................... Perform Wiktionary-specific transform queries only for Wiktionary requests Profiling reveals that stripping various content from the DOM is a long- running and expensive set of operations in every mobile-sections* request, and we should look for ways to streamline where possible. This patch removes a set of selectors specific to Wiktionary from the list of selectors used for all mobile-sections* requests and moves them to a function called only from the Wiktionary endpoint code. Since the Wiktionary payload is much smaller than the typical mobile- sections* payload (as most of the HTML content ends up discarded after definition parsing), I've also removed some per-tag attributes from the list of those selected for removal in all Parsoid HTML requests and not replaced them in a Wiktionary-specific function. I looked here to determine which transforms were added specifically for Wiktionary: https://gerrit.wikimedia.org/r/#/c/255263/23/lib/transforms.js Change-Id: I0f383d60fbaf12edd9a2aad85200d358ac4e8732 --- M lib/parsoid-access.js M lib/transforms.js M test/features/definition/definition.js 3 files changed, 21 insertions(+), 5 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps refs/changes/96/299296/1 diff --git a/lib/parsoid-access.js b/lib/parsoid-access.js index 26fc4b8..2df9572 100644 --- a/lib/parsoid-access.js +++ b/lib/parsoid-access.js @@ -190,6 +190,7 @@ .then(function (response) { var doc = domino.createDocument(response.body); transforms.stripUnneededMarkup(doc); + transforms.stripWiktionarySpecificMarkup(doc); transforms.rmElementsWithSelector(doc, 'sup'); transforms.inlineSpanText(doc); addSectionDivs(doc); diff --git a/lib/transforms.js b/lib/transforms.js index 5aa235f..e7e848d 100644 --- a/lib/transforms.js +++ b/lib/transforms.js @@ -7,6 +7,7 @@ var domino = require('domino'); var util = require('util'); +var underscore = require('underscore'); var anchorPopUpMediaTransforms = require('./transformations/anchorPopUpMediaTransforms'); var hideRedLinks = require('./transformations/hideRedLinks'); var hideIPA = require('./transformations/hideIPA'); @@ -140,10 +141,10 @@ _rmAttributes(doc, 'figure', ['id', 'typeof']); _rmAttributes(doc, 'b,q,td,figcaption', ['id']); _rmAttributes(doc, 'figcaption a[class~=image]', ['class']); // T123527 - _rmAttributes(doc, 'i', ['about', 'data-mw', 'id', 'typeof']); + _rmAttributes(doc, 'i', ['about', 'id']); _rmAttributes(doc, 'li', ['about']); _rmAttributes(doc, 'img', ['alt', 'data-file-height', 'data-file-type', 'data-file-width', 'id', 'resource']); - _rmAttributes(doc, 'span', ['about', 'data-file-type', 'data-mw', 'id', 'itemscope', 'itemtype', 'lang', 'rel', + _rmAttributes(doc, 'span', ['about', 'data-file-type', 'data-mw', 'id', 'itemscope', 'itemtype', 'rel', 'title', 'typeof']); } @@ -153,9 +154,6 @@ function _runAllSectionsTransforms(doc) { var rmSelectors = [ 'span.Z3988', // Remove <span class=\"Z3988\"></span> - 'span.ib-brac', // Remove <span class=\"ib-brac\"></span> - 'span.ib-content', // Remove <span class=\"ib-content\"></span> - 'span.defdate', 'span:empty', // Remove empty <span></span> 'link', 'sup.noprint', @@ -199,6 +197,19 @@ //} /** + * Remove Wiktionary-specific unwanted content from the DOM + */ +transforms.stripWiktionarySpecificMarkup = function(doc) { + var unwantedClasses = [ 'ib-brac', 'ib-content', 'defdate' ]; + underscore.each(unwantedClasses, function(unwantedClass) { + var elements = doc.getElementsByClassName(unwantedClass); + underscore.each(elements, function(element) { + element.parentNode.removeChild(element); + }); + }); +}; + +/** * Nukes stuff from the DOM we don't want for pages from Parsoid. */ transforms.stripUnneededMarkup = function(doc) { diff --git a/test/features/definition/definition.js b/test/features/definition/definition.js index 2f3aaa7..40c583b 100644 --- a/test/features/definition/definition.js +++ b/test/features/definition/definition.js @@ -18,6 +18,10 @@ return preq.get({ uri: server.config.uri + 'en.wiktionary.org/v1/page/definition/cat' }) .then(function(res) { var en = res.body.en; + var bodytext = JSON.stringify(res.body); + assert.ok(bodytext.indexOf('ib-brac') === -1); + assert.ok(bodytext.indexOf('ib-content') === -1); + assert.ok(bodytext.indexOf('defdate') === -1); assert.deepEqual(res.status, 200); assert.notDeepEqual(en, undefined); assert.ok(en.length == 8) -- To view, visit https://gerrit.wikimedia.org/r/299296 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I0f383d60fbaf12edd9a2aad85200d358ac4e8732 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: Mholloway <mhollo...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits