Mholloway has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/299296

Change subject: Perform Wiktionary-specific transform queries only for 
Wiktionary requests
......................................................................

Perform Wiktionary-specific transform queries only for Wiktionary requests

Profiling reveals that stripping various content from the DOM is a long-
running and expensive set of operations in every mobile-sections* request,
and we should look for ways to streamline where possible.

This patch removes a set of selectors specific to Wiktionary from the list
of selectors used for all mobile-sections* requests and moves them to a
function called only from the Wiktionary endpoint code.

Since the Wiktionary payload is much smaller than the typical mobile-
sections* payload (as most of the HTML content ends up discarded after
definition parsing), I've also removed some per-tag attributes from the list
of those selected for removal in all Parsoid HTML requests and not
replaced them in a Wiktionary-specific function.

I looked here to determine which transforms were added specifically for
Wiktionary:

https://gerrit.wikimedia.org/r/#/c/255263/23/lib/transforms.js

Change-Id: I0f383d60fbaf12edd9a2aad85200d358ac4e8732
---
M lib/parsoid-access.js
M lib/transforms.js
M test/features/definition/definition.js
3 files changed, 21 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/96/299296/1

diff --git a/lib/parsoid-access.js b/lib/parsoid-access.js
index 26fc4b8..2df9572 100644
--- a/lib/parsoid-access.js
+++ b/lib/parsoid-access.js
@@ -190,6 +190,7 @@
         .then(function (response) {
             var doc = domino.createDocument(response.body);
             transforms.stripUnneededMarkup(doc);
+            transforms.stripWiktionarySpecificMarkup(doc);
             transforms.rmElementsWithSelector(doc, 'sup');
             transforms.inlineSpanText(doc);
             addSectionDivs(doc);
diff --git a/lib/transforms.js b/lib/transforms.js
index 5aa235f..e7e848d 100644
--- a/lib/transforms.js
+++ b/lib/transforms.js
@@ -7,6 +7,7 @@
 
 var domino = require('domino');
 var util = require('util');
+var underscore = require('underscore');
 var anchorPopUpMediaTransforms = 
require('./transformations/anchorPopUpMediaTransforms');
 var hideRedLinks = require('./transformations/hideRedLinks');
 var hideIPA = require('./transformations/hideIPA');
@@ -140,10 +141,10 @@
     _rmAttributes(doc, 'figure', ['id', 'typeof']);
     _rmAttributes(doc, 'b,q,td,figcaption', ['id']);
     _rmAttributes(doc, 'figcaption a[class~=image]', ['class']); // T123527
-    _rmAttributes(doc, 'i', ['about', 'data-mw', 'id', 'typeof']);
+    _rmAttributes(doc, 'i', ['about', 'id']);
     _rmAttributes(doc, 'li', ['about']);
     _rmAttributes(doc, 'img', ['alt', 'data-file-height', 'data-file-type', 
'data-file-width', 'id', 'resource']);
-    _rmAttributes(doc, 'span', ['about', 'data-file-type', 'data-mw', 'id', 
'itemscope', 'itemtype', 'lang', 'rel',
+    _rmAttributes(doc, 'span', ['about', 'data-file-type', 'data-mw', 'id', 
'itemscope', 'itemtype', 'rel',
         'title', 'typeof']);
 }
 
@@ -153,9 +154,6 @@
 function _runAllSectionsTransforms(doc) {
     var rmSelectors = [
         'span.Z3988',                               // Remove <span 
class=\"Z3988\"></span>
-        'span.ib-brac',                             // Remove <span 
class=\"ib-brac\"></span>
-        'span.ib-content',                          // Remove <span 
class=\"ib-content\"></span>
-        'span.defdate',
         'span:empty',                               // Remove empty 
<span></span>
         'link',
         'sup.noprint',
@@ -199,6 +197,19 @@
 //}
 
 /**
+ * Remove Wiktionary-specific unwanted content from the DOM
+ */
+transforms.stripWiktionarySpecificMarkup = function(doc) {
+    var unwantedClasses = [ 'ib-brac', 'ib-content', 'defdate' ];
+    underscore.each(unwantedClasses, function(unwantedClass) {
+        var elements = doc.getElementsByClassName(unwantedClass);
+        underscore.each(elements, function(element) {
+            element.parentNode.removeChild(element);
+        });
+    });
+};
+
+/**
  * Nukes stuff from the DOM we don't want for pages from Parsoid.
  */
 transforms.stripUnneededMarkup = function(doc) {
diff --git a/test/features/definition/definition.js 
b/test/features/definition/definition.js
index 2f3aaa7..40c583b 100644
--- a/test/features/definition/definition.js
+++ b/test/features/definition/definition.js
@@ -18,6 +18,10 @@
         return preq.get({ uri: server.config.uri + 
'en.wiktionary.org/v1/page/definition/cat' })
             .then(function(res) {
                 var en = res.body.en;
+                var bodytext = JSON.stringify(res.body);
+                assert.ok(bodytext.indexOf('ib-brac') === -1);
+                assert.ok(bodytext.indexOf('ib-content') === -1);
+                assert.ok(bodytext.indexOf('defdate') === -1);
                 assert.deepEqual(res.status, 200);
                 assert.notDeepEqual(en, undefined);
                 assert.ok(en.length == 8)

-- 
To view, visit https://gerrit.wikimedia.org/r/299296
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0f383d60fbaf12edd9a2aad85200d358ac4e8732
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <mhollo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to