GWicke has submitted this change and it was merged.

Change subject: Fixed DSR errors introduced by DOM spec changes
......................................................................


Fixed DSR errors introduced by DOM spec changes

* DSR computation used DOM Spec markers on A-tags to detect
  source wikitext syntax in computing dsr tag widths. This code
  went out ofsync with DOM spec changes.

* This patch introduces helpers to detect a-tag source syntax
  and adds additional data-parsoid markers for rfc/pmid/isbn
  uses.

* Eliminated lower-case html tag names in
  mediawiki.wikitext.constants.js which got rid of the only
  lower-case HTML tag names in that file.

* No change in parser test results but 25 fewer DSR warnings
  and eliminates NaN entries.

Change-Id: Idf0a15d64739da5d82f0632864987ff547daded6
---
M js/lib/dom.computeDSR.js
M js/lib/dom.migrateTemplateMarkerMetas.js
M js/lib/mediawiki.wikitext.constants.js
M js/lib/pegTokenizer.pegjs.txt
4 files changed, 64 insertions(+), 44 deletions(-)



diff --git a/js/lib/dom.computeDSR.js b/js/lib/dom.computeDSR.js
index 49ac60d..ff4f287 100644
--- a/js/lib/dom.computeDSR.js
+++ b/js/lib/dom.computeDSR.js
@@ -5,6 +5,25 @@
        Util = require('./mediawiki.Util.js').Util,
        dumpDOM = require('./dom.dumper.js').dumpDOM;
 
+// Helper function to detect when an A-node uses [[..]] style wikilink syntax
+// mw:ExtLink rel-type is not sufficient anymore since [[..]] style links can
+// also be tagged ext-links
+function usesWikiLinkSyntax(aNode, dp) {
+       return aNode.getAttribute("rel") === "mw:WikiLink" ||
+               (dp.stx && dp.stx !== "url" && dp.stx !== "protocol");
+}
+
+function usesExtLinkSyntax(aNode, dp) {
+       return aNode.getAttribute("rel") === "mw:ExtLink" &&
+               (!dp.stx || (dp.stx !== "url" && dp.stx !== "protocol"));
+}
+
+function usesURLLinkSyntax(aNode, dp) {
+       return aNode.getAttribute("rel") === "mw:ExtLink" &&
+               dp.stx &&
+               (dp.stx === "url" || dp.stx === "protocol");
+}
+
 /* ------------------------------------------------------------------------
  * TSR = "Tag Source Range".  Start and end offsets giving the location
  * where the tag showed up in the original source.
@@ -79,8 +98,7 @@
                if (!dp) {
                        return null;
                } else {
-                       var aType = node.getAttribute("rel");
-                       if (aType === "mw:WikiLink" &&
+                       if (usesWikiLinkSyntax(node, dp) &&
                                
!DU.isExpandedAttrsMetaType(node.getAttribute("typeof")))
                        {
                                if (dp.stx === "piped") {
@@ -93,8 +111,10 @@
                                } else {
                                        return [2, 2];
                                }
-                       } else if (aType === "mw:ExtLink" && dp.tsr && dp.stx 
!== 'url') {
+                       } else if (dp.tsr && usesExtLinkSyntax(node, dp)) {
                                return [dp.targetOff - dp.tsr[0], 1];
+                       } else if (usesURLLinkSyntax(node, dp)) {
+                               return [0, 0];
                        } else {
                                return null;
                        }
@@ -111,19 +131,19 @@
                                etWidth = widths[1];
                        }
                } else {
-                       var nodeName = node.nodeName.toLowerCase();
+                       var nodeName = node.nodeName;
                        // 'tr' tags not in the original source have zero width
-                       if (nodeName === 'tr' && !dp.startTagSrc) {
+                       if (nodeName === 'TR' && !dp.startTagSrc) {
                                stWidth = 0;
                                etWidth = 0;
                        } else {
                                var wtTagWidth = Consts.WT_TagWidths[nodeName];
                                if (stWidth === null) {
                                        // we didn't have a tsr to tell us how 
wide this tag was.
-                                       if (nodeName === 'a') {
+                                       if (nodeName === 'A') {
                                                wtTagWidth = 
computeATagWidth(node, dp);
                                                stWidth = wtTagWidth ? 
wtTagWidth[0] : null;
-                                       } else if (nodeName === 'li' || 
nodeName === 'dd') {
+                                       } else if (nodeName === 'LI' || 
nodeName === 'DD') {
                                                stWidth = 
computeListEltWidth(node, nodeName);
                                        } else if (wtTagWidth) {
                                                stWidth = wtTagWidth[0];
@@ -364,13 +384,13 @@
                                 * we don't have to worry about the above 
decisions and checks.
                                 * 
----------------------------------------------------------------- */
 
-                               if (DU.hasNodeName(child, "a") &&
-                                       child.getAttribute("rel") === 
"mw:WikiLink" &&
+                               if (child.nodeName === 'A' &&
+                                       usesWikiLinkSyntax(child, dp) &&
                                        dp.stx !== "piped")
                                {
                                        /* 
-------------------------------------------------------------
                                         * This check here eliminates artifical 
DSR mismatches on content
-                                        * text of the a-node because of entity 
expansion, etc.
+                                        * text of the A-node because of entity 
expansion, etc.
                                         *
                                         * Ex: [[7%25 solution]] will be 
rendered as:
                                         *    <a href=....>7% solution</a>
diff --git a/js/lib/dom.migrateTemplateMarkerMetas.js 
b/js/lib/dom.migrateTemplateMarkerMetas.js
index c93c8c0..c6d27ab 100644
--- a/js/lib/dom.migrateTemplateMarkerMetas.js
+++ b/js/lib/dom.migrateTemplateMarkerMetas.js
@@ -33,7 +33,7 @@
 
                        // We can migrate the meta-tag across this node's 
end-tag barrier only
                        // if that end-tag is zero-width.
-                       tagWidth = 
Consts.WT_TagWidths[node.nodeName.toLowerCase()];
+                       tagWidth = Consts.WT_TagWidths[node.nodeName];
                        if (tagWidth && tagWidth[0] === 0 && 
!DU.isLiteralHTMLNode(node)) {
                                node.parentNode.insertBefore(firstChild, node);
                        }
@@ -45,7 +45,7 @@
 
                        // We can migrate the meta-tag across this node's 
end-tag barrier only
                        // if that end-tag is zero-width.
-                       tagWidth = 
Consts.WT_TagWidths[node.nodeName.toLowerCase()];
+                       tagWidth = Consts.WT_TagWidths[node.nodeName];
                        if (tagWidth && tagWidth[1] === 0 && 
!DU.isLiteralHTMLNode(node)) {
                                node.parentNode.insertBefore(lastChild, 
node.nextSibling);
                        }
diff --git a/js/lib/mediawiki.wikitext.constants.js 
b/js/lib/mediawiki.wikitext.constants.js
index f10b6c8..7fea8c1 100644
--- a/js/lib/mediawiki.wikitext.constants.js
+++ b/js/lib/mediawiki.wikitext.constants.js
@@ -210,34 +210,34 @@
        // Known wikitext tag widths -- these are known statically
        // but other widths are computed or updated based on actual wikitext 
usage
        WT_TagWidths: {
-               "body"  : [0,0],
-               "html"  : [0,0],
-               "head"  : [0,0],
-               "p"     : [0,0],
-               "meta"  : [0,0],
-               "tbody" : [0,0],
-               "pre"   : [1,0],
-               "ol"    : [0,0],
-               "ul"    : [0,0],
-               "dl"    : [0,0],
-               "li"    : [1,0],
-               "dt"    : [1,0],
-               "dd"    : [1,0],
-               "h1"    : [1,1],
-               "h2"    : [2,2],
-               "h3"    : [3,3],
-               "h4"    : [4,4],
-               "h5"    : [5,5],
-               "h6"    : [6,6],
-               "hr"    : [4,0],
-               "table" : [2,2],
-               "tr"    : [null,0],
-               "td"    : [null,0],
-               "th"    : [null,0],
-               "b"     : [3,3],
-               "i"     : [2,2],
-               "br"    : [0,0],
-               "figure": [2,2]
+               "BODY"  : [0,0],
+               "HTML"  : [0,0],
+               "HEAD"  : [0,0],
+               "P"     : [0,0],
+               "META"  : [0,0],
+               "TBODY" : [0,0],
+               "PRE"   : [1,0],
+               "OL"    : [0,0],
+               "UL"    : [0,0],
+               "DL"    : [0,0],
+               "LI"    : [1,0],
+               "DT"    : [1,0],
+               "DD"    : [1,0],
+               "H1"    : [1,1],
+               "H2"    : [2,2],
+               "H3"    : [3,3],
+               "H4"    : [4,4],
+               "H5"    : [5,5],
+               "H6"    : [6,6],
+               "HR"    : [4,0],
+               "TABLE" : [2,2],
+               "TR"    : [null,0],
+               "TD"    : [null,0],
+               "TH"    : [null,0],
+               "B"     : [3,3],
+               "I"     : [2,2],
+               "BR"    : [0,0],
+               "FIGURE": [2,2]
        },
 
        // HTML tags whose wikitext equivalents are zero-width.
@@ -256,10 +256,10 @@
 Object.keys(WikitextConstants.WT_TagWidths).forEach(function(tag) {
        // This special case can be fixed by maybe removing them WT_TagWidths.
        // They may no longer be necessary -- to be investigated in another 
patch.
-       if (tag !== 'html' && tag !== 'head' && tag !== 'body') {
+       if (tag !== 'HTML' && tag !== 'HEAD' && tag !== 'BODY') {
                var widths = WikitextConstants.WT_TagWidths[tag];
                if (widths[0] === 0 && widths[1] === 0) {
-                       zeroWidthTags.push(tag.toUpperCase());
+                       zeroWidthTags.push(tag);
                }
        }
 });
diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index d250d4b..5418acb 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -861,7 +861,7 @@
            new KV('mw:content', [ref, identifier].join(' ')),
            new KV( 'typeof', 'mw:ExtLink/' + ref )
         ],
-        {tsr: [pos0, pos]})
+        {stx: "protocol", tsr: [pos0, pos]})
     ];
 }
 
@@ -893,7 +893,7 @@
            new KV('mw:content', 'ISBN ' + isbn),
            new KV('typeof', 'mw:WikiLink/ISBN')
         ],
-        {tsr: [pos0, pos]})
+        {stx: "protocol", tsr: [pos0, pos]})
     ];
 }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/101299
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Idf0a15d64739da5d82f0632864987ff547daded6
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to