Subramanya Sastry has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/379430 )

Change subject: Detect misnested tags that have different behavior in HTML5 vs 
HTML4
......................................................................

Detect misnested tags that have different behavior in HTML5 vs HTML4

* This will make a different when Tidy is replaced by Remex and
  will also make Parsoid rendering more compatible with Tidy.

Bug: T176363
Change-Id: I93d779eba9b7738c309b8a8a4a89f337bb4ac168
---
M lib/utils/DOMUtils.js
M lib/wt2html/pp/handlers/linter.js
M tests/mocha/linter.js
3 files changed, 76 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/30/379430/1

diff --git a/lib/utils/DOMUtils.js b/lib/utils/DOMUtils.js
index e073bde..5de498f 100644
--- a/lib/utils/DOMUtils.js
+++ b/lib/utils/DOMUtils.js
@@ -1244,6 +1244,12 @@
                return next;
        },
 
+       hasFollowingContent: function(node) {
+               return !DU.isBody(node) && (
+                       DU.nextNonSepSibling(node) || 
DU.hasFollowingContent(node.parentNode)
+               );
+       },
+
        numNonDeletedChildNodes: function(node) {
                var n = 0;
                var child = node.firstChild;
diff --git a/lib/wt2html/pp/handlers/linter.js 
b/lib/wt2html/pp/handlers/linter.js
index 8e95440..bd6fb5b 100644
--- a/lib/wt2html/pp/handlers/linter.js
+++ b/lib/wt2html/pp/handlers/linter.js
@@ -17,6 +17,24 @@
 var Util = require('../../../utils/Util.js').Util;
 var Consts = require('../../../config/WikitextConstants.js').WikitextConstants;
 
+var tagsWithChangedMisnestingBehavior;
+function getTagsWithChangedMisnestingBehavior() {
+       if (!tagsWithChangedMisnestingBehavior) {
+               tagsWithChangedMisnestingBehavior = new Set();
+               Consts.HTML.HTML5Tags.forEach(function(t) {
+                       if (Consts.Sanitizer.TagWhiteList.has(t) &&
+                               !Consts.HTML.BlockTags.has(t) &&
+                               !Consts.HTML.FormattingTags.has(t) &&
+                               !Consts.HTML.VoidTags.has(t)
+                       ) {
+                               tagsWithChangedMisnestingBehavior.add(t);
+                       }
+               });
+       }
+
+       return tagsWithChangedMisnestingBehavior;
+}
+
 /*
  * Log Transclusion with more than one parts
  * Ex - {{table-start}}
@@ -172,16 +190,22 @@
                                templateInfo: templateInfo,
                                params: { name: cNodeName },
                        };
-                       var adjNode = getNextMatchingNode(c, c);
-                       if (adjNode) {
-                               var adjDp = DU.getDataParsoid(adjNode);
-                               if (!adjDp.tmp) {
-                                       adjDp.tmp = {};
+                       if 
(getTagsWithChangedMisnestingBehavior().has(c.nodeName) &&
+                               DU.hasFollowingContent(c)
+                       ) {
+                               env.log('lint/html5-misnesting', lintObj);
+                       } else {
+                               var adjNode = getNextMatchingNode(c, c);
+                               if (adjNode) {
+                                       var adjDp = DU.getDataParsoid(adjNode);
+                                       if (!adjDp.tmp) {
+                                               adjDp.tmp = {};
+                                       }
+                                       adjDp.tmp.linted = true;
+                                       env.log('lint/misnested-tag', lintObj);
+                               } else if (DU.hasLiteralHTMLMarker(dp)) {
+                                       env.log('lint/missing-end-tag', 
lintObj);
                                }
-                               adjDp.tmp.linted = true;
-                               env.log('lint/misnested-tag', lintObj);
-                       } else if (DU.hasLiteralHTMLMarker(dp)) {
-                               env.log('lint/missing-end-tag', lintObj);
                        }
                }
 
diff --git a/tests/mocha/linter.js b/tests/mocha/linter.js
index f1ad362..d206801 100644
--- a/tests/mocha/linter.js
+++ b/tests/mocha/linter.js
@@ -600,4 +600,41 @@
                        return expectEmptyResults(wt, { tweakEnv: tweakEnv });
                });
        });
+
+       describe('HTML5 MISNESTED TAGS', function() {
+               it('should not trigger html5 misnesting if there is no 
following content', function() {
+                       return parseWT('<del>foo\nbar').then(function(result) {
+                               result.should.have.length(1);
+                               result[0].should.have.a.property("type", 
"missing-end-tag");
+                               result[0].should.have.a.property("params");
+                               result[0].params.should.have.a.property("name", 
"del");
+                       });
+               });
+               it('should trigger html5 misnesting correctly', function() {
+                       return parseWT('<del>foo\n\nbar').then(function(result) 
{
+                               result.should.have.length(1);
+                               result[0].should.have.a.property("type", 
"html5-misnesting");
+                               result[0].dsr.should.deep.equal([ 0, 8, 5, 0 ]);
+                               result[0].should.have.a.property("params");
+                               result[0].params.should.have.a.property("name", 
"del");
+                       });
+               });
+               it('should trigger html5 misnesting for span', function() {
+                       return 
parseWT('<span>foo\n\nbar').then(function(result) {
+                               result.should.have.length(1);
+                               result[0].should.have.a.property("type", 
"html5-misnesting");
+                               result[0].dsr.should.deep.equal([ 0, 9, 6, 0 ]);
+                               result[0].should.have.a.property("params");
+                               result[0].params.should.have.a.property("name", 
"span");
+                       });
+               });
+               it('should not trigger html5 misnesting for formatting tags', 
function() {
+                       return 
parseWT('<small>foo\n\nbar').then(function(result) {
+                               result.should.have.length(1);
+                               result[0].should.have.a.property("type", 
"missing-end-tag");
+                               result[0].should.have.a.property("params");
+                               result[0].params.should.have.a.property("name", 
"small");
+                       });
+               });
+       });
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/379430
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I93d779eba9b7738c309b8a8a4a89f337bb4ac168
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to