jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/393908 )

Change subject: Properly handle short headings
......................................................................


Properly handle short headings

This is a minor issue, but it causes some test case failures which would
otherwise get blamed on I12b2a148f7170d20bd9aacd3b5b8ee1965859592.

Bug: T21910
Change-Id: I11926f2d2365755794d8f8f6647b1f0b02b827ab
---
M lib/wt2html/pegTokenizer.pegjs
M tests/parserTests-blacklist.js
M tests/parserTests.txt
3 files changed, 68 insertions(+), 10 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs
index 3dfa339..3f3edd6 100644
--- a/lib/wt2html/pegTokenizer.pegjs
+++ b/lib/wt2html/pegTokenizer.pegjs
@@ -489,14 +489,29 @@
     r:(
      s:$'='+ // moved in here to make s accessible to inner action
      & { return stops.inc('h'); }
-     c:nested_block_line
-     e:$'='+
+     ce:(
+       nested_block_line
+       $'='+
+     )?
      endTPos:("" { return endOffset(); })
      spc:(spaces / comment)*
+     & { stops.dec('h'); return ce || s.length > 2; }
      &eolf
      {
-        stops.dec('h');
-        var level = Math.min(s.length, e.length);
+        var c;
+        var e;
+        var level;
+        if (ce) {
+            c = ce[0];
+            e = ce[1];
+            level = Math.min(s.length, e.length);
+        } else {
+            // split up equal signs into two equal parts, with at least
+            // one character in the middle.
+            level = Math.floor((s.length - 1) / 2);
+            c = '='.repeat(s.length - 2 * level);
+            s = e = '='.repeat(level);
+        }
         level = Math.min(6, level);
         // convert surplus equals into text
         if (s.length > level) {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 9b7e84c..e01d540 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -146,7 +146,6 @@
 add("wt2html", "TOC with wgMaxTocLevel=3 (T8204)", "<h2 id=\"title_1\" 
data-parsoid='{\"dsr\":[0,13,2,2]}'> title 1 </h2>\n<h3 id=\"title_1.1\" 
data-parsoid='{\"dsr\":[14,31,3,3]}'> title 1.1 </h3>\n<h4 id=\"title_1.1.1\" 
data-parsoid='{\"dsr\":[32,53,4,4]}'> title 1.1.1 </h4>\n<h3 id=\"title_1.2\" 
data-parsoid='{\"dsr\":[54,71,3,3]}'> title 1.2 </h3>\n<h2 id=\"title_2\" 
data-parsoid='{\"dsr\":[72,85,2,2]}'> title 2 </h2>\n<h3 id=\"title_2.1\" 
data-parsoid='{\"dsr\":[86,103,3,3]}'> title 2.1 </h3>");
 add("wt2html", "TOC with wgMaxTocLevel=3 and two level four headings (T8204)", 
"<h2 id=\"Section_1\" data-parsoid='{\"dsr\":[0,13,2,2]}'>Section 1</h2>\n<h3 
id=\"Section_1.1\" data-parsoid='{\"dsr\":[14,31,3,3]}'>Section 1.1</h3>\n<h4 
id=\"Section_1.1.1\" data-parsoid='{\"dsr\":[32,53,4,4]}'>Section 
1.1.1</h4>\n<h4 id=\"Section_1.1.1.1\" 
data-parsoid='{\"dsr\":[54,77,4,4]}'>Section 1.1.1.1</h4>\n<h2 id=\"Section_2\" 
data-parsoid='{\"dsr\":[78,91,2,2]}'>Section 2</h2>");
 add("wt2html", "TOC regression (T14077)", "<meta property=\"mw:PageProp/toc\" 
data-parsoid='{\"src\":\"__TOC__\",\"magicSrc\":\"__TOC__\",\"dsr\":[0,7,null,null]}'/>\n<h2
 id=\"title_1\" data-parsoid='{\"dsr\":[8,21,2,2]}'> title 1 </h2>\n<h3 
id=\"title_1.1\" data-parsoid='{\"dsr\":[22,39,3,3]}'> title 1.1 </h3>\n<h2 
id=\"title_2\" data-parsoid='{\"dsr\":[40,53,2,2]}'> title 2 </h2>");
-add("wt2html", "Short headings with trailing space should match behavior of 
Parser::doHeadings (T21910)", "<p data-parsoid='{\"dsr\":[0,100,0,0]}'>=== 
\nThe line above must have a trailing space!\n=== <!--\n--> <!-- -->\nBut just 
in case it doesn't...</p>");
 add("wt2html", "Header with special characters (T27462)", "<p 
data-parsoid='{\"dsr\":[0,72,0,0]}'>The tooltips shall not show entities to the 
user (ie. be double escaped)</p>\n\n<h2 id=\"text_.3E_text\" 
data-parsoid='{\"dsr\":[74,91,2,2]}'> text > text </h2>\n<p 
data-parsoid='{\"dsr\":[92,101,0,0]}'>section 1</p>\n\n<h2 id=\"text_.3C_text\" 
data-parsoid='{\"dsr\":[103,120,2,2]}'> text &lt; text </h2>\n<p 
data-parsoid='{\"dsr\":[121,130,0,0]}'>section 2</p>\n\n<h2 
id=\"text_.26_text\" data-parsoid='{\"dsr\":[132,149,2,2]}'> text &amp; text 
</h2>\n<p data-parsoid='{\"dsr\":[150,159,0,0]}'>section 3</p>\n\n<h2 
id=\"text_.27_text\" data-parsoid='{\"dsr\":[161,178,2,2]}'> text ' text 
</h2>\n<p data-parsoid='{\"dsr\":[179,188,0,0]}'>section 4</p>\n\n<h2 
id=\"text_.22_text\" data-parsoid='{\"dsr\":[190,207,2,2]}'> text \" text 
</h2>\n<p data-parsoid='{\"dsr\":[208,217,0,0]}'>section 5</p>");
 add("wt2html", "Header with space, plus and underscore as entity", "<p 
data-parsoid='{\"dsr\":[0,34,0,0]}'>Id should not contain + for 
spaces</p>\n\n<h2 id=\"Space_between_Text\" 
data-parsoid='{\"dsr\":[36,60,2,2]}'> Space between Text </h2>\n<p 
data-parsoid='{\"dsr\":[61,70,0,0]}'>section 1</p>\n\n<h2 
id=\"Space-Entity_between_Text\" data-parsoid='{\"dsr\":[72,111,2,2]}'> 
Space-Entity<span typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#32;\",\"srcContent\":\" 
\",\"dsr\":[87,92,null,null]}'> </span>between<span typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#32;\",\"srcContent\":\" 
\",\"dsr\":[99,104,null,null]}'> </span>Text </h2>\n<p 
data-parsoid='{\"dsr\":[112,121,0,0]}'>section 2</p>\n\n<h2 
id=\"Plus.2Bbetween.2BText\" data-parsoid='{\"dsr\":[123,146,2,2]}'> 
Plus+between+Text </h2>\n<p data-parsoid='{\"dsr\":[147,156,0,0]}'>section 
3</p>\n\n<h2 id=\"Plus-Entity.2Bbetween.2BText\" 
data-parsoid='{\"dsr\":[158,196,2,2]}'> Plus-Entity<span typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#43;\",\"srcContent\":\"+\",\"dsr\":[172,177,null,null]}'>+</span>between<span
 typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#43;\",\"srcContent\":\"+\",\"dsr\":[184,189,null,null]}'>+</span>Text
 </h2>\n<p data-parsoid='{\"dsr\":[197,206,0,0]}'>section 4</p>\n\n<h2 
id=\"Underscore_between_Text\" data-parsoid='{\"dsr\":[208,237,2,2]}'> 
Underscore_between_Text </h2>\n<p 
data-parsoid='{\"dsr\":[238,247,0,0]}'>section 5</p>\n\n<h2 
id=\"Underscore-Entity_between_Text\" data-parsoid='{\"dsr\":[249,293,2,2]}'> 
Underscore-Entity<span typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#95;\",\"srcContent\":\"_\",\"dsr\":[269,274,null,null]}'>_</span>between<span
 typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#95;\",\"srcContent\":\"_\",\"dsr\":[281,286,null,null]}'>_</span>Text
 </h2>\n<p data-parsoid='{\"dsr\":[294,303,0,0]}'>section 6</p>\n\n<p 
data-parsoid='{\"dsr\":[305,501,0,0]}'><a rel=\"mw:WikiLink\" 
href=\"./Main_Page#Space_between_Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Space_between_Text\"},\"sa\":{\"href\":\"#Space
 between Text\"},\"dsr\":[305,328,2,2]}'>#Space between Text</a>\n<a 
rel=\"mw:WikiLink\" href=\"./Main_Page#Space-Entity_between_Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Space-Entity_between_Text\"},\"sa\":{\"href\":\"#Space-Entity&amp;#32;between&amp;#32;Text\"},\"dsr\":[329,367,2,2]}'>#Space-Entity
 between Text</a>\n<a rel=\"mw:WikiLink\" 
href=\"./Main_Page#Plus.2Bbetween.2BText\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Plus.2Bbetween.2BText\"},\"sa\":{\"href\":\"#Plus+between+Text\"},\"dsr\":[368,390,2,2]}'>#Plus+between+Text</a>\n<a
 rel=\"mw:WikiLink\" href=\"./Main_Page#Plus-Entity.2Bbetween.2BText\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Plus-Entity.2Bbetween.2BText\"},\"sa\":{\"href\":\"#Plus-Entity&amp;#43;between&amp;#43;Text\"},\"dsr\":[391,428,2,2]}'>#Plus-Entity+between+Text</a>\n<a
 rel=\"mw:WikiLink\" href=\"./Main_Page#Underscore_between_Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Underscore_between_Text\"},\"sa\":{\"href\":\"#Underscore_between_Text\"},\"dsr\":[429,457,2,2]}'>#Underscore_between_Text</a>\n<a
 rel=\"mw:WikiLink\" href=\"./Main_Page#Underscore-Entity_between_Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Underscore-Entity_between_Text\"},\"sa\":{\"href\":\"#Underscore-Entity&amp;#95;between&amp;#95;Text\"},\"dsr\":[458,501,2,2]}'>#Underscore-Entity_between_Text</a></p>");
 add("wt2html", "Headers with excess '=' characters\n(Are similar tests 
necessary beyond the 1st level?)", "<h1 id=\"foo.3D\" 
data-parsoid='{\"dsr\":[0,6,1,1]}'>foo=</h1>\n<h1 id=\".3Dfoo\" 
data-parsoid='{\"dsr\":[7,13,1,1]}'>=foo</h1>\n<h1 id=\"italic_heading.3D\" 
data-parsoid='{\"dsr\":[14,35,1,1]}'><i 
data-parsoid='{\"dsr\":[15,25,2,2]}'>italic</i> heading=</h1>\n<h1 
id=\".3Ditalic_heading\" data-parsoid='{\"dsr\":[36,57,1,1]}'>=<i 
data-parsoid='{\"dsr\":[38,48,2,2]}'>italic</i> heading</h1>");
@@ -862,7 +861,6 @@
 add("html2wt", "__NOEDITSECTION__ keyword", "== Section 1 ==\n\n== Section 2 
==\n");
 add("html2wt", "Link inside a section heading", "== Section with a [[wiki/Main 
Page|link]] in it ==\n");
 add("html2wt", "TOC regression (T14077)", "<div id=\"toc\" class=\"toc\"><div 
class=\"toctitle\">\n== Contents ==\n</div>\n\n* [[#title_1|<span 
class=\"tocnumber\">1</span> <span class=\"toctext\">title 1</span>]]\n\n** 
[[#title_1.1|<span class=\"tocnumber\">1.1</span> <span class=\"toctext\">title 
1.1</span>]]\n* [[#title_2|<span class=\"tocnumber\">2</span> <span 
class=\"toctext\">title 2</span>]]\n\n</div>\n\n== title 1 ==\n\n=== title 1.1 
===\n\n== title 2 ==\n");
-add("html2wt", "Short headings with trailing space should match behavior of 
Parser::doHeadings (T21910)", "= = =\nThe line above must have a trailing 
space!\n\n= = =\nBut just in case it doesn't...\n");
 add("html2wt", "Header with special characters (T27462)", "The tooltips shall 
not show entities to the user (ie. be double escaped)\n\n<div id=\"toc\" 
class=\"toc\"><div class=\"toctitle\">\n== Contents ==\n</div>\n\n* 
[[#text_.3E_text|<span class=\"tocnumber\">1</span> <span 
class=\"toctext\">text > text</span>]]\n* [[#text_.3C_text|<span 
class=\"tocnumber\">2</span> <span class=\"toctext\">text < text</span>]]\n* 
[[#text_.26_text|<span class=\"tocnumber\">3</span> <span 
class=\"toctext\">text & text</span>]]\n* [[#text_.27_text|<span 
class=\"tocnumber\">4</span> <span class=\"toctext\">text ' text</span>]]\n* 
[[#text_.22_text|<span class=\"tocnumber\">5</span> <span 
class=\"toctext\">text \" text</span>]]\n\n</div>\n\n== text > text ==\nsection 
1\n\n== text < text ==\nsection 2\n\n== text & text ==\nsection 3\n\n== text ' 
text ==\nsection 4\n\n== text \" text ==\nsection 5\n");
 add("html2wt", "Header with space, plus and underscore as entity", "Id should 
not contain + for spaces\n\n<div id=\"toc\" class=\"toc\"><div 
class=\"toctitle\">\n== Contents ==\n</div>\n\n* [[#Space_between_Text|<span 
class=\"tocnumber\">1</span> <span class=\"toctext\">Space between 
Text</span>]]\n* [[#Space-Entity_between_Text|<span 
class=\"tocnumber\">2</span> <span class=\"toctext\">Space-Entity between 
Text</span>]]\n* [[#Plus.2Bbetween.2BText|<span class=\"tocnumber\">3</span> 
<span class=\"toctext\">Plus+between+Text</span>]]\n* 
[[#Plus-Entity.2Bbetween.2BText|<span class=\"tocnumber\">4</span> <span 
class=\"toctext\">Plus-Entity+between+Text</span>]]\n* 
[[#Underscore_between_Text|<span class=\"tocnumber\">5</span> <span 
class=\"toctext\">Underscore_between_Text</span>]]\n* 
[[#Underscore-Entity_between_Text|<span class=\"tocnumber\">6</span> <span 
class=\"toctext\">Underscore-Entity_between_Text</span>]]\n\n</div>\n\n== Space 
between Text ==\nsection 1\n\n== Space-Entity between Text ==\nsection 2\n\n== 
Plus+between+Text ==\nsection 3\n\n== Plus-Entity+between+Text ==\nsection 
4\n\n== Underscore_between_Text ==\nsection 5\n\n== 
Underscore-Entity_between_Text ==\nsection 6\n\n[[#Space_between_Text|#Space 
between Text]]\n[[#Space-Entity_between_Text|#Space-Entity between 
Text]]\n[[#Plus.2Bbetween.2BText|#Plus+between+Text]]\n[[#Plus-Entity.2Bbetween.2BText|#Plus-Entity+between+Text]]\n[[#Underscore_between_Text|#Underscore_between_Text]]\n[[#Underscore-Entity_between_Text|#Underscore-Entity_between_Text]]\n");
 add("html2wt", "Headers with excess '=' characters\n(Are similar tests 
necessary beyond the 1st level?)", "<div id=\"toc\" class=\"toc\"><div 
class=\"toctitle\">\n== Contents ==\n</div>\n\n* [[#foo.3D|<span 
class=\"tocnumber\">1</span> <span class=\"toctext\">foo=</span>]]\n* 
[[#.3Dfoo|<span class=\"tocnumber\">2</span> <span 
class=\"toctext\">=foo</span>]]\n* [[#italic_heading.3D|<span 
class=\"tocnumber\">3</span> <span class=\"toctext\">''italic'' 
heading=</span>]]\n* [[#.3Ditalic_heading|<span class=\"tocnumber\">4</span> 
<span class=\"toctext\">=''italic'' heading</span>]]\n\n</div>\n\n= foo= =\n\n= 
=foo =\n\n= ''italic'' heading= =\n\n= =''italic'' heading =\n");
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index b1faa82..3d6e6a9 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -16866,21 +16866,30 @@
 <p><a rel="mw:ExtLink" href="http://example.com";>http://example.com</a> 
<figure-inline class="mw-default-size" typeof="mw:Image"><a 
href="./File:Foobar.jpg"><img resource="./File:Foobar.jpg" 
src="//example.com/images/3/3a/Foobar.jpg" data-file-width="1941" 
data-file-height="220" data-file-type="bitmap" height="220" 
width="1941"/></a></figure-inline></p>
 !!end
 
+# Parsoid doesn't wt2wt this cleanly because it adds <nowiki>s.
 !! test
 Short headings with trailing space should match behavior of Parser::doHeadings 
(T21910)
+!! options
+parsoid=wt2html,html2html
 !! wikitext
 === 
 The line above must have a trailing space!
 === <!--
 --> <!-- -->
 But just in case it doesn't...
-!! html
+!! html/php
 <h1><span class="mw-headline" id=".3D">=</span><span 
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a 
href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit 
section: =">edit</a><span class="mw-editsection-bracket">]</span></span></h1>
 <p>The line above must have a trailing space!
 </p>
 <h1><span class="mw-headline" id=".3D_2">=</span><span 
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a 
href="/index.php?title=Parser_test&amp;action=edit&amp;section=2" title="Edit 
section: =">edit</a><span class="mw-editsection-bracket">]</span></span></h1>
 <p>But just in case it doesn't...
 </p>
+!! html/parsoid
+<h1 id=".3D">=</h1> 
+<p>The line above must have a trailing space!</p>
+<h1 id=".3D_2">=</h1> <!--
+--> <!-- -->
+<p>But just in case it doesn't...</p>
 !! end
 
 !! test
@@ -24845,17 +24854,53 @@
 !! options
 parsoid=html2wt
 !! html/parsoid
-<p>===
-=foo= x
+<p>=foo= x
 =foo= <s></s>
 </p>
 !! wikitext
-===
 =foo= x
 =foo= <s></s>
+!! html/php
+<p>=foo= x
+=foo= <s></s>
+</p>
 !!end
 
 !! test
+Headings: 4c. Short headings (1)
+!! options
+parsoid=html2wt
+!! html/parsoid
+<p>===
+</p>
+!! wikitext
+<nowiki>===</nowiki>
+!! html/php
+<p>===
+</p>
+!! end
+
+# in the html2wt direction we emit '= = =' or '=<nowiki>=</nowiki>='
+!! test
+Headings: 4d. Short headings (2)
+!! options
+parsoid=wt2html,html2html
+!! wikitext
+===
+====
+=====
+!! html/php
+<h1><span class="mw-headline" id=".3D">=</span><span 
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a 
href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit 
section: =">edit</a><span class="mw-editsection-bracket">]</span></span></h1>
+<h1><span class="mw-headline" id=".3D.3D">==</span><span 
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a 
href="/index.php?title=Parser_test&amp;action=edit&amp;section=2" title="Edit 
section: ==">edit</a><span class="mw-editsection-bracket">]</span></span></h1>
+<h2><span class="mw-headline" id=".3D_2">=</span><span 
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a 
href="/index.php?title=Parser_test&amp;action=edit&amp;section=3" title="Edit 
section: =">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+
+!! html/parsoid
+<h1 id=".3D">=</h1>
+<h1 id=".3D.3D">==</h1>
+<h2 id=".3D_2">=</h2>
+!! end
+
+!! test
 Headings: 5. Empty headings
 !! options
 parsoid=html2wt

-- 
To view, visit https://gerrit.wikimedia.org/r/393908
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I11926f2d2365755794d8f8f6647b1f0b02b827ab
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: C. Scott Ananian <canan...@wikimedia.org>
Gerrit-Reviewer: Sbailey <sbai...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to