jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/379902 )

Change subject: Remove data-mw attributes before parsing summary
......................................................................


Remove data-mw attributes before parsing summary

Bug: T176521
Change-Id: Ifd0c47114973fd7c1cc1c2231a52947b5dfcbe34
---
M lib/transformations/summarize.js
M test/lib/transformations/summarize.js
2 files changed, 7 insertions(+), 0 deletions(-)

Approvals:
  BearND: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/transformations/summarize.js b/lib/transformations/summarize.js
index 342371c..bf9e205 100644
--- a/lib/transformations/summarize.js
+++ b/lib/transformations/summarize.js
@@ -3,6 +3,7 @@
 const domino = require('domino');
 const flattenElements = require('./flattenElements');
 const rmElementsWithSelector = require('./rmElementsWithSelector');
+const removeAttributes = require('./removeAttributes');
 
 /**
  * Recursively discard any parentheticals that themselves are inside 
parentheticals
@@ -29,6 +30,7 @@
 module.exports = function(html) {
     const doc = domino.createDocument(html);
     flattenElements(doc, 'a');
+    removeAttributes(doc, '*', ['data-mw']);
     rmElementsWithSelector(doc, '.mw-ref, .reference');
     rmElementsWithSelector(doc, '.noexcerpts');
     rmElementsWithSelector(doc, 'math');
diff --git a/test/lib/transformations/summarize.js 
b/test/lib/transformations/summarize.js
index 75d3532..b58e3a3 100644
--- a/test/lib/transformations/summarize.js
+++ b/test/lib/transformations/summarize.js
@@ -43,6 +43,11 @@
                 '<p>The Planck–Einstein relation connects the particulate 
photon energy <span class=\"texhtml \"><i>E</i></span> with its associated wave 
frequency <span class=\"texhtml \"><i>f</i></span>:</p>\n\n<dl id=\"mwmQ\"><dd 
id=\"mwmg\"><span class=\"mwe-math-element\"><span 
class=\"mwe-math-mathml-inline mwe-math-mathml-a11y\" style=\"display: 
none;\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\";>\n  <semantics>\n    
<mrow class=\"MJX-TeXAtom-ORD\">\n      <mstyle displaystyle=\"true\" 
scriptlevel=\"0\">\n        <mi>E</mi>\n        <mo>=</mo>\n        
<mi>h</mi>\n        <mi>f</mi>\n      </mstyle>\n    </mrow>\n    <annotation 
encoding=\"application/x-tex\">{\\displaystyle E=hf}</annotation>\n  
</semantics>\n</math></span><img 
src=\"https://wikimedia.org/api/rest_v1/media/math/render/svg/f39fac3593bb1e2dec0282c112c4dff7a99007f6\";
 class=\"mwe-math-fallback-image-inline\" aria-hidden=\"true\" 
style=\"vertical-align: -0.671ex; width:7.533ex; 
height:2.509ex;\"></span></dd></dl>',
                 '<p>The Planck–Einstein relation connects the particulate 
photon energy <span class=\"texhtml \"><i>E</i></span> with its associated wave 
frequency <span class=\"texhtml \"><i>f</i></span>:</p>\n\n<dl id=\"mwmQ\"><dd 
id=\"mwmg\"><span class=\"mwe-math-element\"><img 
src=\"https://wikimedia.org/api/rest_v1/media/math/render/svg/f39fac3593bb1e2dec0282c112c4dff7a99007f6\";
 class=\"mwe-math-fallback-image-inline\" aria-hidden=\"true\" 
style=\"vertical-align: -0.671ex; width:7.533ex; 
height:2.509ex;\"></span></dd></dl>'
             ],
+            // Any parentheticals inside a data-mw attribute are ignored.
+            [
+                '<p><b>Shakira Isabel Mebarak Ripoll</b> (<small data-mw="(t) 
<!--Arabic (Spanish pronunciation)">pronounced<span>&nbsp;</span></small><span 
class="IPA"><span>[(t)ʃaˈkiɾa isaˈβel meβaˈɾak riˈpol]</span></span>; 
<span><span class="ipa_button"></span><span class="nowrap 
mcs-ipa"><small>English: </small><span class="IPA nopopups 
noexcerpt"><span>/<span style="border-bottom:1px 
dotted"><span>ʃ</span><span>ə</span><span>ˈ</span><span>k</span><span>iː</span><span>r</span><span>ə</span></span>/</span></span></span></span>;
 born 2 February 1977) is a Colombian singer, songwriter, dancer.</p>',
+                '<p><b>Shakira Isabel Mebarak Ripoll</b> is a Colombian 
singer, songwriter, dancer.</p>'
+            ],
             // Any content in parentheticals is stripped and no double spaces 
are left in the output
             [
                 '<p><b>Epistemology</b> (<span class="nowrap"><span class="IPA 
nopopups noexcerpt"><span>/<span style="border-bottom:1px dotted"><span 
title="/ɪ/ or /ə/ \'e\' in \'roses\'">ᵻ</span><span title="/ˌ/ secondary stress 
follows">ˌ</span><span title="\'p\' in \'pie\'">p</span><span title="/ɪ/ short 
\'i\' in \'bid\'">ɪ</span><span title="\'s\' in \'sigh\'">s</span><span 
title="\'t\' in \'tie\'">t</span><span title="/ɪ/ or /ə/ \'e\' in 
\'roses\'">ᵻ</span><span title="/ˈ/ primary stress follows">ˈ</span><span 
title="\'m\' in \'my\'">m</span><span title="/ɒ/ short \'o\' in 
\'body\'">ɒ</span><span title="\'l\' in \'lie\'">l</span><span title="/ə/ \'a\' 
in \'about\'">ə</span><span title="/dʒ/ \'j\' in \'jam\'">dʒ</span><span 
title="/i/ \'y\' in \'happy\'">i</span></span>/</span></span><small 
class="nowrap metadata">&nbsp;(<span class="unicode haudio"><span 
class="fn"><span style="white-space:nowrap"><span><img alt="About this sound" 
src="//upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/11px-Loudspeaker.svg.png"
 width="11" height="11" 
srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/17px-Loudspeaker.svg.png
 1.5x, 
//upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/22px-Loudspeaker.svg.png
 2x" data-file-width="20" 
data-file-height="20"></span>&nbsp;</span><span>listen</span></span></span>)</small></span>;
 from <span>Greek</span> <span lang="gre" xml:lang="gre"><span 
>ἐπιστήμη</span><i>, epistēmē</i></span>, meaning \'knowledge\', and <span 
lang="" xml:lang=""><span>λόγος</span><i>, <span>logos</span></i></span>, 
meaning \'logical discourse\') is the <span>branch</span> of 
<span>philosophy</span> concerned with the theory of 
<span>knowledge</span>.</p>',

-- 
To view, visit https://gerrit.wikimedia.org/r/379902
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ifd0c47114973fd7c1cc1c2231a52947b5dfcbe34
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Jdlrobson <jrob...@wikimedia.org>
Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org>
Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org>
Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org>
Gerrit-Reviewer: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org>
Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org>
Gerrit-Reviewer: Mhurd <mh...@wikimedia.org>
Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org>
Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to