Arlolra has uploaded a new change for review. https://gerrit.wikimedia.org/r/201087
Change subject: Combine the cleanup and template cleanup passes ...................................................................... Combine the cleanup and template cleanup passes * The template cleanup pass wasn't having a affect on v2 output. * The blacklist changes are a little unclear. Not if it was a bug to stop processing the entire template when encountering a mw-reference-text class, or was the intention just that node, or all its children? Hopefully review can shed some light. Change-Id: I4c6452e49563cf3923a5bb69485f1ea7d0c13e33 --- D lib/dom.cleanUpTemplates.js M lib/dom.cleanup.js M lib/dom.linter.js M lib/mediawiki.DOMPostProcessor.js M lib/mediawiki.DOMUtils.js M tests/parserTests-blacklist.js 6 files changed, 223 insertions(+), 256 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/87/201087/1 diff --git a/lib/dom.cleanUpTemplates.js b/lib/dom.cleanUpTemplates.js deleted file mode 100644 index e6d372d..0000000 --- a/lib/dom.cleanUpTemplates.js +++ /dev/null @@ -1,44 +0,0 @@ -"use strict"; - -var DU = require('./mediawiki.DOMUtils.js').DOMUtils; - -function stripEmptyElements(node, tplInfo, options) { - // Cannot delete if: - // * it is the first node since that carries the transclusion - // information (typeof, data-mw). We could delete and migrate - // the info over, but more pain than worth it. We can reconsider if - // this ever becomes an issue. - // * it has any attributes. - if (!node.firstChild && node !== tplInfo.first && - node.nodeName in {'TR':1, 'LI':1} && node.attributes.length === 0) - { - DU.deleteNode(node); - } -} - -function removeDataParsoid(node, tplInfo, options) { - if (node !== tplInfo.first) { - var dp = DU.getDataParsoid(node); - // We can't remove data-parsoid from inside <references> text, as that's - // the only HTML representation we have left for it. - if (node.getAttribute('class') === "mw-reference-text") { - tplInfo.done = true; - return; - } - // TODO: We can't remove dp from nodes with stx information - // right now, as the serializer needs that information to know which - // content model the text came from to emit the right newline separators. - // For example, both "a\n\nb" and "<p>a</p><p>b/p>" both generate - // identical html but serialize to different wikitext. - if (!dp.stx) { - node.removeAttribute('data-parsoid'); - } - } -} - -if (typeof module === "object") { - module.exports.stripEmptyElements = - DU.traverseTplOrExtNodes.bind(DU, stripEmptyElements); - module.exports.removeDataParsoid = - DU.traverseTplOrExtNodes.bind(DU, removeDataParsoid); -} diff --git a/lib/dom.cleanup.js b/lib/dom.cleanup.js index 38f35fe..5d5bc4c 100644 --- a/lib/dom.cleanup.js +++ b/lib/dom.cleanup.js @@ -33,74 +33,115 @@ } } -/** - * Perform some final cleaup and save data-parsoid attributes on each node. - */ -function cleanupAndSaveDataParsoid( env, node, atTopLevel ) { - if ( DU.isElt(node) ) { - var dp = DU.getDataParsoid( node ); - if (dp) { - // Delete empty auto-inserted elements - var next = node.nextSibling; - if (dp.autoInsertedStart && dp.autoInsertedEnd && - !DU.isTplOrExtToplevelNode(node) && - (node.childNodes.length === 0 || - node.childNodes.length === 1 && !DU.isElt(node.firstChild) && /^\s*$/.test(node.textContent))) - { - if (node.firstChild) { - // migrate the ws out - node.parentNode.insertBefore(node.firstChild, node); - } - DU.deleteNode(node); - return next; - } - - dp.tagId = undefined; - - var validDataMW = !!Object.keys(DU.getDataMw(node)).length; - if ( !validDataMW ) { - // strip it - DU.setDataMw(node, undefined); - } - - // Remove dp.src from elements that have valid data-mw and dsr. This - // should reduce data-parsoid bloat. - if (validDataMW && Util.isValidDSR(dp.dsr)) { - dp.src = undefined; - } else if (/(?:^|\s)mw:(Transclusion|Extension)(\/[^\s]+)*(?=$|\s)/.test(node.getAttribute("typeof")) && - (!atTopLevel || !dp.tsr)) { - // Transcluded nodes will not have dp.tsr set and dont need dp.src either - dp.src = undefined; - } - - // Remove tsr - if (dp.tsr) { - dp.tsr = undefined; - } - - // Remove temporary information - dp.tmp = undefined; - - // Make dsr zero-range for fostered content - // to prevent selser from duplicating this content - // outside the table from where this came. - // - // But, do not zero it out if the node has template encapsulation - // information. That will be disastrous (see bug 52638, 52488). - if (dp.fostered && dp.dsr && !DU.isFirstEncapsulationWrapperNode(node)) { - dp.dsr[0] = dp.dsr[1]; - } - - if ( atTopLevel && env.storeDataParsoid ) { - DU.stripDataParsoid( env, node, dp ); - } - } - DU.saveDataAttribs( node ); +function stripEmptyElements( node, env, atTopLevel, tplInfo ) { + if ( !atTopLevel || !tplInfo || !DU.isElt( node ) ) { + return; } - return true; + // Cannot delete if: + // * it is the first node since that carries the transclusion + // information (typeof, data-mw). We could delete and migrate + // the info over, but more pain than worth it. We can reconsider if + // this ever becomes an issue. + // * it has any attributes. + if ( !node.firstChild && node !== tplInfo.first && + node.nodeName in {'TR':1, 'LI':1} && node.attributes.length === 0 + ) { + DU.deleteNode(node); + } } -if (typeof module === "object") { - module.exports.cleanupAndSaveDataParsoid = cleanupAndSaveDataParsoid; +/** + * Perform some final cleanup and save data-parsoid attributes on each node. + */ +function cleanupAndSaveDataParsoid( node, env, atTopLevel, tplInfo ) { + if ( !DU.isElt( node ) ) { return; } + + var dp = DU.getDataParsoid( node ); + + // Delete empty auto-inserted elements + if ( dp.autoInsertedStart && dp.autoInsertedEnd && + !DU.isTplOrExtToplevelNode( node ) && ( + node.childNodes.length === 0 || ( + node.childNodes.length === 1 && !DU.isElt( node.firstChild ) && + /^\s*$/.test(node.textContent) + ) + ) ) { + var next = node.nextSibling; + if ( node.firstChild ) { + // migrate the ws out + node.parentNode.insertBefore( node.firstChild, node ); + } + DU.deleteNode( node ); + return next; + } + + var validDataMW = !!Object.keys( DU.getDataMw( node ) ).length; + if ( !validDataMW ) { + // Strip it + DU.setDataMw( node, undefined ); + } + + // Remove dp.src from elements that have valid data-mw and dsr. This + // should reduce data-parsoid bloat. + if ( validDataMW && Util.isValidDSR(dp.dsr) ) { + dp.src = undefined; + } else if ( tplInfo && tplInfo.first === node && (!atTopLevel || !dp.tsr) ) { + // Transcluded nodes will not have dp.tsr set + // and don't need dp.src either. + dp.src = undefined; + } + + // Remove tsr + if ( dp.tsr ) { + dp.tsr = undefined; + } + + // Remove temporary information + dp.tmp = undefined; + dp.tagId = undefined; + + // Make dsr zero-range for fostered content + // to prevent selser from duplicating this content + // outside the table from where this came. + // + // But, do not zero it out if the node has template encapsulation + // information. That will be disastrous (see bug 52638, 52488). + if ( dp.fostered && dp.dsr && !DU.isFirstEncapsulationWrapperNode(node) ) { + dp.dsr[0] = dp.dsr[1]; + } + + if ( atTopLevel ) { + // Strip data-parsoid from templated content, where not necessary. + if ( tplInfo && + // Keep for the first node. + tplInfo.first !== node && + // We can't remove data-parsoid from inside <references> text, + // as that's the only HTML representation we have left for it. + node.getAttribute('class') !== "mw-reference-text" && + // TODO: We can't remove dp from nodes with stx information + // right now, as the serializer needs that information to know + // which content model the text came from to emit the right + // newline separators. For example, both "a\n\nb" and + // "<p>a</p><p>b/p>" both generate identical html but serialize + // to different wikitext. + !dp.stx + ) { + DU.getNodeData( node ).parsoid = undefined; + node.removeAttribute( "data-parsoid" ); + } + // Store for v2 purposes. + else if ( env.storeDataParsoid ) { + DU.stripDataParsoid( env, node, dp ); + } + } + + DU.saveDataAttribs( node ); +} + +if ( typeof module === "object" ) { + module.exports.cleanupAndSaveDataParsoid = + DU.traverseWithTplOrExtInfo.bind( DU, cleanupAndSaveDataParsoid ); + module.exports.stripEmptyElements = + DU.traverseWithTplOrExtInfo.bind(DU, stripEmptyElements); module.exports.stripMarkerMetas = stripMarkerMetas; } diff --git a/lib/dom.linter.js b/lib/dom.linter.js index 96d0470..276c16d 100644 --- a/lib/dom.linter.js +++ b/lib/dom.linter.js @@ -28,11 +28,10 @@ * https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec#Transclusion_content */ -function logTransclusions(env, c) { - - var dmw = DU.getDataMw(c); +function logTransclusions( env, node, dp, tplInfo ) { + var dmw = DU.getDataMw(node); if ( dmw ) { - var dsr = DU.getDataParsoid(c).dsr; + var dsr = tplInfo.dsr; if (dmw.parts) { var parts = dmw.parts, lintObj; if (typeof parts[0] === 'string' || typeof parts[parts.length - 1] === 'string') { @@ -53,7 +52,6 @@ } } } - return; } /* @@ -64,14 +62,14 @@ * 2. Unclosed Start Tags * 3. Stripped Tags */ -function logTreeBuilderFixup(env, c, dp, tmpl) { +function logTreeBuilderFixup(env, c, dp, tplInfo) { var cNodeName = c.nodeName.toLowerCase(), dsr = dp.dsr, lintObj, inTransclusion = false; - if (tmpl) { - dsr = tmpl.dsr; + if (tplInfo) { + dsr = tplInfo.dsr; inTransclusion = true; } @@ -94,13 +92,13 @@ DU.hasLiteralHTMLMarker(dp) && dsr) ) { - if (dp.autoInsertedEnd === true && (tmpl || dsr[2]>0) ) { + if (dp.autoInsertedEnd === true && (tplInfo || dsr[2]>0) ) { lintObj = { src:env.page.src, dsr:dsr, tip:'Add End Tag to Fix this', inTransclusion:inTransclusion}; env.log('lint/missing-end-tag', lintObj); } - if (dp.autoInsertedStart === true && (tmpl || dsr[3]>0) ) { + if (dp.autoInsertedStart === true && (tplInfo || dsr[3]>0) ) { lintObj = { src:env.page.src, dsr:dsr, tip:'Add Start Tag to Fix this', inTransclusion:inTransclusion}; env.log('lint/missing-start-tag', lintObj); @@ -119,7 +117,7 @@ * * Here foo gets Ignored and is found in the data-parsoid of <tr> tags. */ -function logIgnoredTableAttr(env, c, dp, tmpl) { +function logIgnoredTableAttr(env, c, dp, tplInfo) { var dsr, inTransclusion = false; if (DU.hasNodeName(c, "table")) { var fc = c.firstChild; @@ -141,8 +139,8 @@ } if (wc) { - if ( tmpl ) { - dsr = tmpl.dsr; + if ( tplInfo ) { + dsr = tplInfo.dsr; inTransclusion = true; } else { dsr = dp.dsr; @@ -172,24 +170,24 @@ * * Here 'foo' gets fostered out. */ -function logFosteredContent(env, c, dp, tmpl, nextSibling){ - - var dsr, inTransclusion = false; - var fosteredSRC = c.innerHTML; - while (nextSibling && !DU.hasNodeName(nextSibling,'table')) { +function logFosteredContent( env, node, dp, tplInfo ) { + var fosteredSRC = node.innerHTML, + nextSibling = node.nextSibling; + while ( nextSibling && !DU.hasNodeName(nextSibling, 'table') ) { fosteredSRC += nextSibling.innerHTML; - if ( nextSibling === tmpl.last ) { - tmpl.done = true; + if ( tplInfo && nextSibling === tplInfo.last ) { + tplInfo.clear = true; } nextSibling = nextSibling.nextSibling; } - if (tmpl) { - dsr = tmpl.dsr; + var dsr, inTransclusion = false; + if ( tplInfo ) { + dsr = tplInfo.dsr; inTransclusion = true; } else { dsr = DU.getDataParsoid(nextSibling).dsr; } - var lintObj = { src:fosteredSRC, dsr:dsr, inTransclusion:inTransclusion }; + var lintObj = { src: fosteredSRC, dsr: dsr, inTransclusion: inTransclusion }; env.log('lint/fostered', lintObj); return nextSibling; } @@ -200,13 +198,13 @@ * See - http://www.w3.org/TR/html5/obsolete.html#non-conforming-features * */ -function logObsoleteHTMLTags(env, c, dp, tmpl) { +function logObsoleteHTMLTags(env, c, dp, tplInfo) { var dsr = dp.dsr, inTransclusion = false; var re = /^(BIG|CENTER|FONT|STRIKE|TT)$/; - if (tmpl) { - dsr = tmpl.dsr; + if (tplInfo) { + dsr = tplInfo.dsr; inTransclusion = true; } @@ -222,15 +220,15 @@ * See - https://www.mediawiki.org/wiki/Help:Images#Syntax * */ -function logBogusImageOptions(env, c, dp, tmpl) { +function logBogusImageOptions(env, c, dp, tplInfo) { if(DU.isGeneratedFigure(c)) { var optlist = dp.optList; optlist.forEach(function (item) { if (item.ck === "bogus") { var dsr, inTransclusion = false; - if ( tmpl ) { - dsr = tmpl.dsr; + if ( tplInfo ) { + dsr = tplInfo.dsr; inTransclusion = true; } else { dsr = dp.dsr; @@ -242,65 +240,41 @@ } } -function logWikitextFixups(node, env, options, atTopLevel, tmpl) { +function logWikitextFixups( node, env, atTopLevel, tplInfo ) { // For now, don't run linter in subpipelines. // Only on the final DOM for the top-level page. - if (!atTopLevel) { + if ( !atTopLevel || !DU.isElt( node ) ) { return; } - var c = node.firstChild; + var dp = DU.getDataParsoid( node ); - while (c) { - var nextSibling = c.nextSibling, - dp = DU.getDataParsoid(c); - - // Store info from the first node of an about id group. - // Nested templates aren't an issue because we expand top-level - // templates with the mediawiki api. - if ( !tmpl && DU.isTplOrExtToplevelNode(c) ) { - var about = c.getAttribute('about'); - tmpl = { - last: DU.getAboutSiblings(c, about).last(), - dsr: dp.dsr, - done: false - }; - - // Log transclusions with more than one part - logTransclusions(env, c); - } - - if (DU.isElt(c)) { - - // Log Tree Builder fixups - logTreeBuilderFixup(env, c, dp, tmpl); - - // Log Ignored Table Attributes - logIgnoredTableAttr(env, c, dp, tmpl); - - // Log obsolete HTML tags - logObsoleteHTMLTags(env, c, dp, tmpl); - - // Log bogus image options - logBogusImageOptions(env, c, dp, tmpl); - - if (dp.fostered) { - // Log Fostered content - nextSibling = logFosteredContent(env, c, dp, tmpl, nextSibling); - } else if (c.childNodes.length > 0) { - // Process subtree - logWikitextFixups(c, env, options, atTopLevel, tmpl); - } - } - - if ( tmpl && (c === tmpl.last || tmpl.done) ) { - tmpl = null; - } - - c = nextSibling; + if ( tplInfo && tplInfo.first === node ) { + // Log transclusions with more than one part + logTransclusions( env, node, dp, tplInfo ); } + + // Log Tree Builder fixups + logTreeBuilderFixup( env, node, dp, tplInfo ); + + // Log Ignored Table Attributes + logIgnoredTableAttr( env, node, dp, tplInfo ); + + // Log obsolete HTML tags + logObsoleteHTMLTags( env, node, dp, tplInfo ); + + // Log bogus image options + logBogusImageOptions( env, node, dp, tplInfo ); + + var next; + if ( dp.fostered ) { + // Log Fostered content + next = logFosteredContent( env, node, dp, tplInfo, next ); + } + return next; } if (typeof module === "object") { - module.exports.logWikitextFixups = logWikitextFixups; + module.exports.logWikitextFixups = + DU.traverseWithTplOrExtInfo.bind( DU, logWikitextFixups ); } diff --git a/lib/mediawiki.DOMPostProcessor.js b/lib/mediawiki.DOMPostProcessor.js index 6420b8e..fea4127 100644 --- a/lib/mediawiki.DOMPostProcessor.js +++ b/lib/mediawiki.DOMPostProcessor.js @@ -11,7 +11,6 @@ DU = require('./mediawiki.DOMUtils.js').DOMUtils, dumpDOM = require('./dom.dumper.js').dumpDOM, CleanUp = require('./dom.cleanup.js'), - cleanupAndSaveDataParsoid = CleanUp.cleanupAndSaveDataParsoid, computeDSR = require('./dom.computeDSR.js').computeDSR, processRefs = require('./dom.processRefs.js').processRefs, handleLinkNeighbours = require('./dom.t.handleLinkNeighbours.js').handleLinkNeighbours, @@ -23,11 +22,9 @@ migrateTemplateMarkerMetas = require('./dom.migrateTemplateMarkerMetas.js').migrateTemplateMarkerMetas, migrateTrailingNLs = require('./dom.migrateTrailingNLs.js').migrateTrailingNLs, TableFixups = require('./dom.t.TableFixups.js'), - stripMarkerMetas = CleanUp.stripMarkerMetas, unpackDOMFragments = require('./dom.t.unpackDOMFragments.js').unpackDOMFragments, wrapTemplates = require('./dom.wrapTemplates.js').wrapTemplates, - cleanUpTemplates = require('./dom.cleanUpTemplates.js'), - lintWikitextFixup = require('./dom.linter.js').logWikitextFixups; + logWikitextFixup = require('./dom.linter.js').logWikitextFixups; // map from mediawiki metadata names to RDFa property names var metadataMap = { @@ -188,17 +185,18 @@ env.conf.parsoid.nativeExtensions.cite)); // Strip empty elements from template content - this.processors.push(cleanUpTemplates.stripEmptyElements); + this.processors.push( CleanUp.stripEmptyElements ); - if (env.conf.parsoid.linting) { - this.processors.push(lintWikitextFixup); - } + if ( env.conf.parsoid.linting ) { + this.processors.push( logWikitextFixup ); + } var domVisitor2 = new DOMTraverser(env), tableFixer = new TableFixups.TableFixups(env); // 1. Strip marker metas -- removes left over marker metas (ex: metas // nested in expanded tpl/extension output). - domVisitor2.addHandler( 'meta', stripMarkerMetas.bind(null, env.conf.parsoid.rtTestMode) ); + domVisitor2.addHandler( 'meta', + CleanUp.stripMarkerMetas.bind( null, env.conf.parsoid.rtTestMode ) ); // 2. Fix up DOM for li-hack. domVisitor2.addHandler( 'li', handleLIHack.bind( null, env ) ); // 3. Fix up issues from templated table cells and table cell attributes @@ -210,12 +208,7 @@ // Save data.parsoid into data-parsoid html attribute. // Make this its own thing so that any changes to the DOM // don't affect other handlers that run alongside it. - var domVisitor3 = new DOMTraverser(env); - domVisitor3.addHandler( null, cleanupAndSaveDataParsoid.bind( null, env ) ); - this.processors.push(domVisitor3.traverse.bind(domVisitor3)); - - // Remove data-parsoid from transcluded content - this.processors.push(cleanUpTemplates.removeDataParsoid); + this.processors.push( CleanUp.cleanupAndSaveDataParsoid ); } // Inherit from EventEmitter diff --git a/lib/mediawiki.DOMUtils.js b/lib/mediawiki.DOMUtils.js index 6e66f41..99375f7 100644 --- a/lib/mediawiki.DOMUtils.js +++ b/lib/mediawiki.DOMUtils.js @@ -260,19 +260,19 @@ if ( !isElt(node) ) { return; } - var data = this.getNodeData( node ); - for (var key in data) { + var data = DU.getNodeData( node ); + Object.keys( data ).forEach(function( key ) { if ( key.match( /^tmp_/ ) !== null ) { - continue; + return; } var val = data[key]; if ( val && val.constructor === String ) { node.setAttribute('data-' + key, val); } else if (val instanceof Object) { - this.setJSONAttribute(node, 'data-' + key, val); + DU.setJSONAttribute(node, 'data-' + key, val); } // Else: throw error? - } + }); }, // Load and stores the data as JSON attributes on the nodes. @@ -840,55 +840,58 @@ } }, - traverseTplOrExtNodes: function (cb, node, env, options, atTopLevel, tplInfo) { - // Don't bother with sub-pipelines - if (!atTopLevel || !node) { - return; - } + traverseWithTplOrExtInfo: function( func, node, env, options, atTopLevel, tplInfo ) { + var next, possibleNext, about, typeOf; + while ( node ) { + next = node.nextSibling; - var c = node.firstChild; - while (c) { - var next = c.nextSibling; - - if (DU.isElt(c)) { - // Identify template/extension content (not interested in "mw:Param" nodes). - // We are interested in the very first node. - if (this.isTplOrExtToplevelNode(c) && - /(^|\s)mw:(Extension|Transclusion)/.test(c.getAttribute("typeof"))) - { - // We know that tplInfo will be null here since we don't - // mark up nested transclusions. - var about = c.getAttribute('about'); + if ( DU.isElt( node ) ) { + typeOf = node.getAttribute("typeof"); + // Identify the first template/extension node. + // Strictly speaking, the !tplInfo check isn't necessary since + // we don't have nested transclusions. + if ( !tplInfo && DU.isTplOrExtToplevelNode(node) && + // No case for Params yet. + /(^|\s)mw:(Extension|Transclusion)/.test( typeOf ) + ) { + about = node.getAttribute("about"); tplInfo = { - first: c, - last: DU.getAboutSiblings(c, about).last(), - // Set next to change the next node to be traversed - next: null, - // Set done to stop traversing - done: false + first: node, + last: DU.getAboutSiblings( node, about ).last(), + dsr: DU.getDataParsoid( node ).dsr, + // Give funcs a chance to clear the tplInfo after + // processing. + clear: false }; } - - if (tplInfo) { - cb(c, tplInfo, options); - } - - if (!(tplInfo && (tplInfo.done || tplInfo.next))) { - // If not done, process subtree - this.traverseTplOrExtNodes(cb, c, env, options, atTopLevel, tplInfo); - } - - if (tplInfo && (tplInfo.last === c || tplInfo.done)) { - // Clear tplInfo - tplInfo = null; - } } - if (tplInfo && tplInfo.next) { - c = tplInfo.next; - } else { - c = next; + // Possibly skip to processing another sibling. + // node.nextSibling returns a node or null, so explicitly test + // against undefined below. + possibleNext = func( node, env, atTopLevel, tplInfo ); + + // We may have walked passed the last about sibling or want to + // ignore the template info in future processing. + if ( tplInfo && tplInfo.clear ) { + tplInfo = null; } + + if ( possibleNext !== undefined ) { + next = possibleNext; + } else if ( DU.isElt( node ) ) { + DU.traverseWithTplOrExtInfo( func, node.firstChild, env, options, atTopLevel, tplInfo ); + } + + // Clear the template info after reaching the last about sibling. + // The case for clearing (or walking pass the last about sibling + // in the subtree seems dubiously useful and error prone, consider + // leaving it out.) + if ( tplInfo && ( tplInfo.last === node || tplInfo.clear ) ) { + tplInfo = null; + } + + node = next; } }, @@ -2051,7 +2054,7 @@ DU.addNormalizedAttribute( node, "id", uid, origId ); } docDp.ids[uid] = dp; - delete DU.getNodeData( node ).parsoid; + DU.getNodeData( node ).parsoid = undefined; // It would be better to instrument all the load sites. node.removeAttribute( "data-parsoid" ); }, diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 3a58913..044d669 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -517,7 +517,7 @@ add("wt2wt", "Ref: 5. body should accept generic wikitext", "A <ref>This is a '''[[bolded link]]''' and this is a {{echo|transclusion}}\n</ref>\n\n<references />"); add("wt2wt", "Ref: 6. indent-pres should not be output in ref-body", "A <ref>foo\n bar\n baz\n</ref>\n\n<references />"); add("wt2wt", "Ref: 7. No p-wrapping in ref-body", "A <ref>foo\nbar\nbaz\nbooz\n</ref>\n\n<references />"); -add("wt2wt", "Ref: 8. transclusion wikitext has lower precedence", "A <ref>foo {{echo|</ref> B C}}\n\n<references />"); +add("wt2wt", "Ref: 8. transclusion wikitext has lower precedence", "A <ref>foo <nowiki>{{</nowiki>echo|</ref> B C}}\n\n<references />"); add("wt2wt", "Ref: 9. unclosed comments should not leak out of ref-body", "A <ref>foo <!----></ref> B C\n<references />"); add("wt2wt", "Ref: 10. Unclosed HTML tags should not leak out of ref-body", "A <ref><b> foo </ref> B C\n\n<references />"); add("wt2wt", "Ref: 19. ref-tags with identical name encodings should get identical indexes", "1 <ref name=\"a & b\">foo</ref> 2 <ref name=\"a &amp; b\" />\n\n<references />"); @@ -869,8 +869,8 @@ add("html2html", "Gallery override link with absolute external link (bug 34852)", "<p data-parsoid='{\"dsr\":[0,4,0,0]}'>\t\t* </p><div style=\"width: 155px\" data-parsoid='{\"stx\":\"html\",\"dsr\":[4,239,26,6]}'>\n\t\t\t<div class=\"thumb\" style=\"width: 150px;\" data-parsoid='{\"stx\":\"html\",\"dsr\":[34,182,41,6]}'><div style=\"margin:68px auto;\" data-parsoid='{\"stx\":\"html\",\"dsr\":[75,176,31,6]}'><img src=\"http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg\" alt=\"120px-Foobar.jpg\" rel=\"mw:externalImage\" data-parsoid='{\"dsr\":[106,170,null,null]}'/></div></div>\n\t\t\t<div class=\"gallerytext\" data-parsoid='{\"stx\":\"html\",\"dsr\":[186,230,25,6]}'>\n<p data-parsoid='{\"dsr\":[212,219,0,0]}'>caption</p>\n\n\t\t\t</div>\n\t\t</div>\n"); add("html2html", "Gallery override link with malicious javascript (bug 34852)", "<p data-parsoid='{\"dsr\":[0,4,0,0]}'>\t\t* </p><div style=\"width: 155px\" data-parsoid='{\"stx\":\"html\",\"dsr\":[4,239,26,6]}'>\n\t\t\t<div class=\"thumb\" style=\"width: 150px;\" data-parsoid='{\"stx\":\"html\",\"dsr\":[34,182,41,6]}'><div style=\"margin:68px auto;\" data-parsoid='{\"stx\":\"html\",\"dsr\":[75,176,31,6]}'><img src=\"http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg\" alt=\"120px-Foobar.jpg\" rel=\"mw:externalImage\" data-parsoid='{\"dsr\":[106,170,null,null]}'/></div></div>\n\t\t\t<div class=\"gallerytext\" data-parsoid='{\"stx\":\"html\",\"dsr\":[186,230,25,6]}'>\n<p data-parsoid='{\"dsr\":[212,219,0,0]}'>caption</p>\n\n\t\t\t</div>\n\t\t</div>\n"); add("html2html", "Gallery with invalid title as link (bug 43964)", "<p data-parsoid='{\"dsr\":[0,4,0,0]}'>\t\t* </p><div style=\"width: 155px\" data-parsoid='{\"stx\":\"html\",\"dsr\":[4,229,26,6]}'>\n\t\t\t<div class=\"thumb\" style=\"width: 150px;\" data-parsoid='{\"stx\":\"html\",\"dsr\":[34,182,41,6]}'><div style=\"margin:68px auto;\" data-parsoid='{\"stx\":\"html\",\"dsr\":[75,176,31,6]}'><img src=\"http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg\" alt=\"120px-Foobar.jpg\" rel=\"mw:externalImage\" data-parsoid='{\"dsr\":[106,170,null,null]}'/></div></div>\n\t\t\t<div class=\"gallerytext\" data-parsoid='{\"stx\":\"html\",\"dsr\":[186,220,25,6]}'>\t\t\t</div>\n\t\t</div>\n"); -add("html2html", "Ref: 19. ref-tags with identical name encodings should get identical indexes", "<p data-parsoid='{\"dsr\":[0,60,0,0]}'>1 <span about=\"#mwt3\" class=\"reference\" id=\"cite_ref-a_.26_b_1-0\" rel=\"dc:references\" typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[2,29,18,6]}' data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-a_.26_b-1\"},\"attrs\":{\"name\":\"a & b\"}}'><a href=\"#cite_note-a_.26_b-1\">[1]</a></span> 2 <span about=\"#mwt4\" class=\"reference\" id=\"cite_ref-a_.26amp.3B_b_2-0\" rel=\"dc:references\" typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[32,60,28,0]}' data-mw='{\"name\":\"ref\",\"attrs\":{\"name\":\"a &amp;amp; b\"}}'><a href=\"#cite_note-a_.26amp.3B_b-2\">[2]</a></span></p>\n\n<ol class=\"references\" typeof=\"mw:Extension/references\" about=\"#mwt6\" data-parsoid='{\"dsr\":[62,76,2,2]}' data-mw='{\"name\":\"references\",\"attrs\":{}}'><li about=\"#cite_note-a_.26_b-1\" id=\"cite_note-a_.26_b-1\"><span rel=\"mw:referencedBy\"><a href=\"#cite_ref-a_.26_b_1-0\">↑</a></span> <span id=\"mw-reference-text-cite_note-a_.26_b-1\" class=\"mw-reference-text\" data-parsoid=\"{}\">foo</span></li><li about=\"#cite_note-a_.26amp.3B_b-2\" id=\"cite_note-a_.26amp.3B_b-2\" data-parsoid=\"{}\"><span rel=\"mw:referencedBy\" data-parsoid=\"{}\"><a href=\"#cite_ref-a_.26amp.3B_b_2-0\" data-parsoid=\"{}\">↑</a></span> <span id=\"mw-reference-text-cite_note-a_.26amp.3B_b-2\" class=\"mw-reference-text\" data-parsoid=\"{}\"></span></li></ol>"); -add("html2html", "References: 5. ref tags in references should be processed while ignoring all other content", "<p data-parsoid='{\"dsr\":[0,44,0,0]}'>A <span about=\"#mwt2\" class=\"reference\" id=\"cite_ref-a_1-0\" rel=\"dc:references\" typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[2,18,16,0]}' data-mw='{\"name\":\"ref\",\"attrs\":{\"name\":\"a\"}}'><a href=\"#cite_note-a-1\">[1]</a></span>\nB <span about=\"#mwt4\" class=\"reference\" id=\"cite_ref-b_2-0\" rel=\"dc:references\" typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[21,44,14,6]}' data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-b-2\"},\"attrs\":{\"name\":\"b\"}}'><a href=\"#cite_note-b-2\">[2]</a></span></p>\n\n<ol class=\"references\" typeof=\"mw:Extension/references\" about=\"#mwt6\" data-parsoid='{\"dsr\":[46,96,2,2]}' data-mw='{\"name\":\"references\",\"body\":{\"extsrc\":\"<ref name=\\\"a\\\">foo</ref>\",\"html\":\"\\n<span about=\\\"#mwt8\\\" class=\\\"reference\\\" rel=\\\"dc:references\\\" typeof=\\\"mw:Extension/ref\\\" data-parsoid='{\\\"dsr\\\":[59,82,14,6]}' data-mw='{\\\"name\\\":\\\"ref\\\",\\\"body\\\":{\\\"id\\\":\\\"mw-reference-text-cite_note-a-1\\\"},\\\"attrs\\\":{\\\"name\\\":\\\"a\\\"}}'><a href=\\\"#cite_note-a-1\\\">[1]</a></span>\\n\"},\"attrs\":{}}'><li about=\"#cite_note-a-1\" id=\"cite_note-a-1\"><span rel=\"mw:referencedBy\"><a href=\"#cite_ref-a_1-0\">↑</a></span> <span id=\"mw-reference-text-cite_note-a-1\" class=\"mw-reference-text\" data-parsoid=\"{}\">foo</span></li><li about=\"#cite_note-b-2\" id=\"cite_note-b-2\" data-parsoid=\"{}\"><span rel=\"mw:referencedBy\" data-parsoid=\"{}\"><a href=\"#cite_ref-b_2-0\" data-parsoid=\"{}\">↑</a></span> <span id=\"mw-reference-text-cite_note-b-2\" class=\"mw-reference-text\" data-parsoid=\"{}\">bar</span></li></ol>"); +add("html2html", "Ref: 19. ref-tags with identical name encodings should get identical indexes", "<p data-parsoid='{\"dsr\":[0,60,0,0]}'>1 <span about=\"#mwt3\" class=\"reference\" id=\"cite_ref-a_.26_b_1-0\" rel=\"dc:references\" typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[2,29,18,6]}' data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-a_.26_b-1\"},\"attrs\":{\"name\":\"a & b\"}}'><a href=\"#cite_note-a_.26_b-1\">[1]</a></span> 2 <span about=\"#mwt4\" class=\"reference\" id=\"cite_ref-a_.26amp.3B_b_2-0\" rel=\"dc:references\" typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[32,60,28,0]}' data-mw='{\"name\":\"ref\",\"attrs\":{\"name\":\"a &amp;amp; b\"}}'><a href=\"#cite_note-a_.26amp.3B_b-2\">[2]</a></span></p>\n\n<ol class=\"references\" typeof=\"mw:Extension/references\" about=\"#mwt6\" data-parsoid='{\"dsr\":[62,76,2,2]}' data-mw='{\"name\":\"references\",\"attrs\":{}}'><li about=\"#cite_note-a_.26_b-1\" id=\"cite_note-a_.26_b-1\"><span rel=\"mw:referencedBy\"><a href=\"#cite_ref-a_.26_b_1-0\">↑</a></span> <span id=\"mw-reference-text-cite_note-a_.26_b-1\" class=\"mw-reference-text\" data-parsoid=\"{}\">foo</span></li><li about=\"#cite_note-a_.26amp.3B_b-2\" id=\"cite_note-a_.26amp.3B_b-2\"><span rel=\"mw:referencedBy\"><a href=\"#cite_ref-a_.26amp.3B_b_2-0\">↑</a></span> <span id=\"mw-reference-text-cite_note-a_.26amp.3B_b-2\" class=\"mw-reference-text\" data-parsoid=\"{}\"></span></li></ol>"); +add("html2html", "References: 5. ref tags in references should be processed while ignoring all other content", "<p data-parsoid='{\"dsr\":[0,44,0,0]}'>A <span about=\"#mwt2\" class=\"reference\" id=\"cite_ref-a_1-0\" rel=\"dc:references\" typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[2,18,16,0]}' data-mw='{\"name\":\"ref\",\"attrs\":{\"name\":\"a\"}}'><a href=\"#cite_note-a-1\">[1]</a></span>\nB <span about=\"#mwt4\" class=\"reference\" id=\"cite_ref-b_2-0\" rel=\"dc:references\" typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[21,44,14,6]}' data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-b-2\"},\"attrs\":{\"name\":\"b\"}}'><a href=\"#cite_note-b-2\">[2]</a></span></p>\n\n<ol class=\"references\" typeof=\"mw:Extension/references\" about=\"#mwt6\" data-parsoid='{\"dsr\":[46,96,2,2]}' data-mw='{\"name\":\"references\",\"body\":{\"extsrc\":\"<ref name=\\\"a\\\">foo</ref>\",\"html\":\"\\n<span about=\\\"#mwt8\\\" class=\\\"reference\\\" rel=\\\"dc:references\\\" typeof=\\\"mw:Extension/ref\\\" data-parsoid='{\\\"dsr\\\":[59,82,14,6]}' data-mw='{\\\"name\\\":\\\"ref\\\",\\\"body\\\":{\\\"id\\\":\\\"mw-reference-text-cite_note-a-1\\\"},\\\"attrs\\\":{\\\"name\\\":\\\"a\\\"}}'><a href=\\\"#cite_note-a-1\\\">[1]</a></span>\\n\"},\"attrs\":{}}'><li about=\"#cite_note-a-1\" id=\"cite_note-a-1\"><span rel=\"mw:referencedBy\"><a href=\"#cite_ref-a_1-0\">↑</a></span> <span id=\"mw-reference-text-cite_note-a-1\" class=\"mw-reference-text\" data-parsoid=\"{}\">foo</span></li><li about=\"#cite_note-b-2\" id=\"cite_note-b-2\"><span rel=\"mw:referencedBy\"><a href=\"#cite_ref-b_2-0\">↑</a></span> <span id=\"mw-reference-text-cite_note-b-2\" class=\"mw-reference-text\" data-parsoid=\"{}\">bar</span></li></ol>"); add("html2html", "Entities in ref name", "<p data-parsoid='{\"dsr\":[0,38,0,0]}'><span about=\"#mwt2\" class=\"reference\" id=\"cite_ref-test_.26amp.3B_me_1-0\" rel=\"dc:references\" typeof=\"mw:Extension/ref\" data-parsoid='{\"dsr\":[0,38,30,6]}' data-mw='{\"name\":\"ref\",\"body\":{\"id\":\"mw-reference-text-cite_note-test_.26amp.3B_me-1\"},\"attrs\":{\"name\":\"test &amp;amp; me\"}}'><a href=\"#cite_note-test_.26amp.3B_me-1\">[1]</a></span></p>\n<ol class=\"references\" typeof=\"mw:Extension/references\" about=\"#mwt4\" data-parsoid='{\"dsr\":[39,53,2,2]}' data-mw='{\"name\":\"references\",\"attrs\":{}}'><li about=\"#cite_note-test_.26amp.3B_me-1\" id=\"cite_note-test_.26amp.3B_me-1\"><span rel=\"mw:referencedBy\"><a href=\"#cite_ref-test_.26amp.3B_me_1-0\">↑</a></span> <span id=\"mw-reference-text-cite_note-test_.26amp.3B_me-1\" class=\"mw-reference-text\" data-parsoid=\"{}\">hi</span></li></ol>"); add("html2html", "Empty LI and TR nodes should not be stripped from top-level content", "<ul data-parsoid='{\"dsr\":[0,9,0,0]}'><li data-parsoid='{\"dsr\":[0,3,1,0]}'> a</li>\n<li data-parsoid='{\"dsr\":[4,5,1,0]}'></li>\n<li data-parsoid='{\"dsr\":[6,9,1,0]}'> b</li></ul>\n\n<table data-parsoid='{\"dsr\":[11,26,2,2]}'>\n\n<tbody data-parsoid='{\"dsr\":[15,24,0,0]}'><tr data-parsoid='{\"startTagSrc\":\"|-\",\"autoInsertedEnd\":true,\"dsr\":[15,22,2,0]}'>\n<td data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[18,22,1,0]}'>foo</td></tr>\n\n</tbody></table>\n"); add("html2html", "Headings: 6a. Heading chars in SOL context (with trailing spaces)", "<p data-parsoid='{\"dsr\":[0,20,0,0]}'><span typeof=\"mw:Nowiki\" data-parsoid='{\"dsr\":[0,20,8,9]}'>=a=</span></p>\n\n<p data-parsoid='{\"dsr\":[22,42,0,0]}'><span typeof=\"mw:Nowiki\" data-parsoid='{\"dsr\":[22,42,8,9]}'>=a=</span></p> \n\n<p data-parsoid='{\"dsr\":[45,65,0,0]}'><span typeof=\"mw:Nowiki\" data-parsoid='{\"dsr\":[45,65,8,9]}'>=a=</span></p>\t\n\n<p data-parsoid='{\"dsr\":[68,88,0,0]}'><span typeof=\"mw:Nowiki\" data-parsoid='{\"dsr\":[68,88,8,9]}'>=a=</span></p> \t\n"); -- To view, visit https://gerrit.wikimedia.org/r/201087 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I4c6452e49563cf3923a5bb69485f1ea7d0c13e33 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits