GWicke has submitted this change and it was merged. Change subject: Fixed DSR errors introduced by DOM spec changes ......................................................................
Fixed DSR errors introduced by DOM spec changes * DSR computation used DOM Spec markers on A-tags to detect source wikitext syntax in computing dsr tag widths. This code went out ofsync with DOM spec changes. * This patch introduces helpers to detect a-tag source syntax and adds additional data-parsoid markers for rfc/pmid/isbn uses. * Eliminated lower-case html tag names in mediawiki.wikitext.constants.js which got rid of the only lower-case HTML tag names in that file. * No change in parser test results but 25 fewer DSR warnings and eliminates NaN entries. Change-Id: Idf0a15d64739da5d82f0632864987ff547daded6 --- M js/lib/dom.computeDSR.js M js/lib/dom.migrateTemplateMarkerMetas.js M js/lib/mediawiki.wikitext.constants.js M js/lib/pegTokenizer.pegjs.txt 4 files changed, 64 insertions(+), 44 deletions(-) diff --git a/js/lib/dom.computeDSR.js b/js/lib/dom.computeDSR.js index 49ac60d..ff4f287 100644 --- a/js/lib/dom.computeDSR.js +++ b/js/lib/dom.computeDSR.js @@ -5,6 +5,25 @@ Util = require('./mediawiki.Util.js').Util, dumpDOM = require('./dom.dumper.js').dumpDOM; +// Helper function to detect when an A-node uses [[..]] style wikilink syntax +// mw:ExtLink rel-type is not sufficient anymore since [[..]] style links can +// also be tagged ext-links +function usesWikiLinkSyntax(aNode, dp) { + return aNode.getAttribute("rel") === "mw:WikiLink" || + (dp.stx && dp.stx !== "url" && dp.stx !== "protocol"); +} + +function usesExtLinkSyntax(aNode, dp) { + return aNode.getAttribute("rel") === "mw:ExtLink" && + (!dp.stx || (dp.stx !== "url" && dp.stx !== "protocol")); +} + +function usesURLLinkSyntax(aNode, dp) { + return aNode.getAttribute("rel") === "mw:ExtLink" && + dp.stx && + (dp.stx === "url" || dp.stx === "protocol"); +} + /* ------------------------------------------------------------------------ * TSR = "Tag Source Range". Start and end offsets giving the location * where the tag showed up in the original source. @@ -79,8 +98,7 @@ if (!dp) { return null; } else { - var aType = node.getAttribute("rel"); - if (aType === "mw:WikiLink" && + if (usesWikiLinkSyntax(node, dp) && !DU.isExpandedAttrsMetaType(node.getAttribute("typeof"))) { if (dp.stx === "piped") { @@ -93,8 +111,10 @@ } else { return [2, 2]; } - } else if (aType === "mw:ExtLink" && dp.tsr && dp.stx !== 'url') { + } else if (dp.tsr && usesExtLinkSyntax(node, dp)) { return [dp.targetOff - dp.tsr[0], 1]; + } else if (usesURLLinkSyntax(node, dp)) { + return [0, 0]; } else { return null; } @@ -111,19 +131,19 @@ etWidth = widths[1]; } } else { - var nodeName = node.nodeName.toLowerCase(); + var nodeName = node.nodeName; // 'tr' tags not in the original source have zero width - if (nodeName === 'tr' && !dp.startTagSrc) { + if (nodeName === 'TR' && !dp.startTagSrc) { stWidth = 0; etWidth = 0; } else { var wtTagWidth = Consts.WT_TagWidths[nodeName]; if (stWidth === null) { // we didn't have a tsr to tell us how wide this tag was. - if (nodeName === 'a') { + if (nodeName === 'A') { wtTagWidth = computeATagWidth(node, dp); stWidth = wtTagWidth ? wtTagWidth[0] : null; - } else if (nodeName === 'li' || nodeName === 'dd') { + } else if (nodeName === 'LI' || nodeName === 'DD') { stWidth = computeListEltWidth(node, nodeName); } else if (wtTagWidth) { stWidth = wtTagWidth[0]; @@ -364,13 +384,13 @@ * we don't have to worry about the above decisions and checks. * ----------------------------------------------------------------- */ - if (DU.hasNodeName(child, "a") && - child.getAttribute("rel") === "mw:WikiLink" && + if (child.nodeName === 'A' && + usesWikiLinkSyntax(child, dp) && dp.stx !== "piped") { /* ------------------------------------------------------------- * This check here eliminates artifical DSR mismatches on content - * text of the a-node because of entity expansion, etc. + * text of the A-node because of entity expansion, etc. * * Ex: [[7%25 solution]] will be rendered as: * <a href=....>7% solution</a> diff --git a/js/lib/dom.migrateTemplateMarkerMetas.js b/js/lib/dom.migrateTemplateMarkerMetas.js index c93c8c0..c6d27ab 100644 --- a/js/lib/dom.migrateTemplateMarkerMetas.js +++ b/js/lib/dom.migrateTemplateMarkerMetas.js @@ -33,7 +33,7 @@ // We can migrate the meta-tag across this node's end-tag barrier only // if that end-tag is zero-width. - tagWidth = Consts.WT_TagWidths[node.nodeName.toLowerCase()]; + tagWidth = Consts.WT_TagWidths[node.nodeName]; if (tagWidth && tagWidth[0] === 0 && !DU.isLiteralHTMLNode(node)) { node.parentNode.insertBefore(firstChild, node); } @@ -45,7 +45,7 @@ // We can migrate the meta-tag across this node's end-tag barrier only // if that end-tag is zero-width. - tagWidth = Consts.WT_TagWidths[node.nodeName.toLowerCase()]; + tagWidth = Consts.WT_TagWidths[node.nodeName]; if (tagWidth && tagWidth[1] === 0 && !DU.isLiteralHTMLNode(node)) { node.parentNode.insertBefore(lastChild, node.nextSibling); } diff --git a/js/lib/mediawiki.wikitext.constants.js b/js/lib/mediawiki.wikitext.constants.js index f10b6c8..7fea8c1 100644 --- a/js/lib/mediawiki.wikitext.constants.js +++ b/js/lib/mediawiki.wikitext.constants.js @@ -210,34 +210,34 @@ // Known wikitext tag widths -- these are known statically // but other widths are computed or updated based on actual wikitext usage WT_TagWidths: { - "body" : [0,0], - "html" : [0,0], - "head" : [0,0], - "p" : [0,0], - "meta" : [0,0], - "tbody" : [0,0], - "pre" : [1,0], - "ol" : [0,0], - "ul" : [0,0], - "dl" : [0,0], - "li" : [1,0], - "dt" : [1,0], - "dd" : [1,0], - "h1" : [1,1], - "h2" : [2,2], - "h3" : [3,3], - "h4" : [4,4], - "h5" : [5,5], - "h6" : [6,6], - "hr" : [4,0], - "table" : [2,2], - "tr" : [null,0], - "td" : [null,0], - "th" : [null,0], - "b" : [3,3], - "i" : [2,2], - "br" : [0,0], - "figure": [2,2] + "BODY" : [0,0], + "HTML" : [0,0], + "HEAD" : [0,0], + "P" : [0,0], + "META" : [0,0], + "TBODY" : [0,0], + "PRE" : [1,0], + "OL" : [0,0], + "UL" : [0,0], + "DL" : [0,0], + "LI" : [1,0], + "DT" : [1,0], + "DD" : [1,0], + "H1" : [1,1], + "H2" : [2,2], + "H3" : [3,3], + "H4" : [4,4], + "H5" : [5,5], + "H6" : [6,6], + "HR" : [4,0], + "TABLE" : [2,2], + "TR" : [null,0], + "TD" : [null,0], + "TH" : [null,0], + "B" : [3,3], + "I" : [2,2], + "BR" : [0,0], + "FIGURE": [2,2] }, // HTML tags whose wikitext equivalents are zero-width. @@ -256,10 +256,10 @@ Object.keys(WikitextConstants.WT_TagWidths).forEach(function(tag) { // This special case can be fixed by maybe removing them WT_TagWidths. // They may no longer be necessary -- to be investigated in another patch. - if (tag !== 'html' && tag !== 'head' && tag !== 'body') { + if (tag !== 'HTML' && tag !== 'HEAD' && tag !== 'BODY') { var widths = WikitextConstants.WT_TagWidths[tag]; if (widths[0] === 0 && widths[1] === 0) { - zeroWidthTags.push(tag.toUpperCase()); + zeroWidthTags.push(tag); } } }); diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt index d250d4b..5418acb 100644 --- a/js/lib/pegTokenizer.pegjs.txt +++ b/js/lib/pegTokenizer.pegjs.txt @@ -861,7 +861,7 @@ new KV('mw:content', [ref, identifier].join(' ')), new KV( 'typeof', 'mw:ExtLink/' + ref ) ], - {tsr: [pos0, pos]}) + {stx: "protocol", tsr: [pos0, pos]}) ]; } @@ -893,7 +893,7 @@ new KV('mw:content', 'ISBN ' + isbn), new KV('typeof', 'mw:WikiLink/ISBN') ], - {tsr: [pos0, pos]}) + {stx: "protocol", tsr: [pos0, pos]}) ]; } -- To view, visit https://gerrit.wikimedia.org/r/101299 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Idf0a15d64739da5d82f0632864987ff547daded6 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits