jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/379902 )
Change subject: Remove data-mw attributes before parsing summary ...................................................................... Remove data-mw attributes before parsing summary Bug: T176521 Change-Id: Ifd0c47114973fd7c1cc1c2231a52947b5dfcbe34 --- M lib/transformations/summarize.js M test/lib/transformations/summarize.js 2 files changed, 7 insertions(+), 0 deletions(-) Approvals: BearND: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/transformations/summarize.js b/lib/transformations/summarize.js index 342371c..bf9e205 100644 --- a/lib/transformations/summarize.js +++ b/lib/transformations/summarize.js @@ -3,6 +3,7 @@ const domino = require('domino'); const flattenElements = require('./flattenElements'); const rmElementsWithSelector = require('./rmElementsWithSelector'); +const removeAttributes = require('./removeAttributes'); /** * Recursively discard any parentheticals that themselves are inside parentheticals @@ -29,6 +30,7 @@ module.exports = function(html) { const doc = domino.createDocument(html); flattenElements(doc, 'a'); + removeAttributes(doc, '*', ['data-mw']); rmElementsWithSelector(doc, '.mw-ref, .reference'); rmElementsWithSelector(doc, '.noexcerpts'); rmElementsWithSelector(doc, 'math'); diff --git a/test/lib/transformations/summarize.js b/test/lib/transformations/summarize.js index 75d3532..b58e3a3 100644 --- a/test/lib/transformations/summarize.js +++ b/test/lib/transformations/summarize.js @@ -43,6 +43,11 @@ '<p>The Planck–Einstein relation connects the particulate photon energy <span class=\"texhtml \"><i>E</i></span> with its associated wave frequency <span class=\"texhtml \"><i>f</i></span>:</p>\n\n<dl id=\"mwmQ\"><dd id=\"mwmg\"><span class=\"mwe-math-element\"><span class=\"mwe-math-mathml-inline mwe-math-mathml-a11y\" style=\"display: none;\"><math xmlns=\"http://www.w3.org/1998/Math/MathML\">\n <semantics>\n <mrow class=\"MJX-TeXAtom-ORD\">\n <mstyle displaystyle=\"true\" scriptlevel=\"0\">\n <mi>E</mi>\n <mo>=</mo>\n <mi>h</mi>\n <mi>f</mi>\n </mstyle>\n </mrow>\n <annotation encoding=\"application/x-tex\">{\\displaystyle E=hf}</annotation>\n </semantics>\n</math></span><img src=\"https://wikimedia.org/api/rest_v1/media/math/render/svg/f39fac3593bb1e2dec0282c112c4dff7a99007f6\" class=\"mwe-math-fallback-image-inline\" aria-hidden=\"true\" style=\"vertical-align: -0.671ex; width:7.533ex; height:2.509ex;\"></span></dd></dl>', '<p>The Planck–Einstein relation connects the particulate photon energy <span class=\"texhtml \"><i>E</i></span> with its associated wave frequency <span class=\"texhtml \"><i>f</i></span>:</p>\n\n<dl id=\"mwmQ\"><dd id=\"mwmg\"><span class=\"mwe-math-element\"><img src=\"https://wikimedia.org/api/rest_v1/media/math/render/svg/f39fac3593bb1e2dec0282c112c4dff7a99007f6\" class=\"mwe-math-fallback-image-inline\" aria-hidden=\"true\" style=\"vertical-align: -0.671ex; width:7.533ex; height:2.509ex;\"></span></dd></dl>' ], + // Any parentheticals inside a data-mw attribute are ignored. + [ + '<p><b>Shakira Isabel Mebarak Ripoll</b> (<small data-mw="(t) <!--Arabic (Spanish pronunciation)">pronounced<span> </span></small><span class="IPA"><span>[(t)ʃaˈkiɾa isaˈβel meβaˈɾak riˈpol]</span></span>; <span><span class="ipa_button"></span><span class="nowrap mcs-ipa"><small>English: </small><span class="IPA nopopups noexcerpt"><span>/<span style="border-bottom:1px dotted"><span>ʃ</span><span>ə</span><span>ˈ</span><span>k</span><span>iː</span><span>r</span><span>ə</span></span>/</span></span></span></span>; born 2 February 1977) is a Colombian singer, songwriter, dancer.</p>', + '<p><b>Shakira Isabel Mebarak Ripoll</b> is a Colombian singer, songwriter, dancer.</p>' + ], // Any content in parentheticals is stripped and no double spaces are left in the output [ '<p><b>Epistemology</b> (<span class="nowrap"><span class="IPA nopopups noexcerpt"><span>/<span style="border-bottom:1px dotted"><span title="/ɪ/ or /ə/ \'e\' in \'roses\'">ᵻ</span><span title="/ˌ/ secondary stress follows">ˌ</span><span title="\'p\' in \'pie\'">p</span><span title="/ɪ/ short \'i\' in \'bid\'">ɪ</span><span title="\'s\' in \'sigh\'">s</span><span title="\'t\' in \'tie\'">t</span><span title="/ɪ/ or /ə/ \'e\' in \'roses\'">ᵻ</span><span title="/ˈ/ primary stress follows">ˈ</span><span title="\'m\' in \'my\'">m</span><span title="/ɒ/ short \'o\' in \'body\'">ɒ</span><span title="\'l\' in \'lie\'">l</span><span title="/ə/ \'a\' in \'about\'">ə</span><span title="/dʒ/ \'j\' in \'jam\'">dʒ</span><span title="/i/ \'y\' in \'happy\'">i</span></span>/</span></span><small class="nowrap metadata"> (<span class="unicode haudio"><span class="fn"><span style="white-space:nowrap"><span><img alt="About this sound" src="//upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/11px-Loudspeaker.svg.png" width="11" height="11" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/17px-Loudspeaker.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/22px-Loudspeaker.svg.png 2x" data-file-width="20" data-file-height="20"></span> </span><span>listen</span></span></span>)</small></span>; from <span>Greek</span> <span lang="gre" xml:lang="gre"><span >ἐπιστήμη</span><i>, epistēmē</i></span>, meaning \'knowledge\', and <span lang="" xml:lang=""><span>λόγος</span><i>, <span>logos</span></i></span>, meaning \'logical discourse\') is the <span>branch</span> of <span>philosophy</span> concerned with the theory of <span>knowledge</span>.</p>', -- To view, visit https://gerrit.wikimedia.org/r/379902 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ifd0c47114973fd7c1cc1c2231a52947b5dfcbe34 Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: Jdlrobson <jrob...@wikimedia.org> Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org> Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org> Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org> Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org> Gerrit-Reviewer: Mhurd <mh...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits