[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Properly handle short headings

2017-11-29 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/393908 )

Change subject: Properly handle short headings
..


Properly handle short headings

This is a minor issue, but it causes some test case failures which would
otherwise get blamed on I12b2a148f7170d20bd9aacd3b5b8ee1965859592.

Bug: T21910
Change-Id: I11926f2d2365755794d8f8f6647b1f0b02b827ab
---
M lib/wt2html/pegTokenizer.pegjs
M tests/parserTests-blacklist.js
M tests/parserTests.txt
3 files changed, 68 insertions(+), 10 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs
index 3dfa339..3f3edd6 100644
--- a/lib/wt2html/pegTokenizer.pegjs
+++ b/lib/wt2html/pegTokenizer.pegjs
@@ -489,14 +489,29 @@
 r:(
  s:$'='+ // moved in here to make s accessible to inner action
  & { return stops.inc('h'); }
- c:nested_block_line
- e:$'='+
+ ce:(
+   nested_block_line
+   $'='+
+ )?
  endTPos:("" { return endOffset(); })
  spc:(spaces / comment)*
+ & { stops.dec('h'); return ce || s.length > 2; }
  
  {
-stops.dec('h');
-var level = Math.min(s.length, e.length);
+var c;
+var e;
+var level;
+if (ce) {
+c = ce[0];
+e = ce[1];
+level = Math.min(s.length, e.length);
+} else {
+// split up equal signs into two equal parts, with at least
+// one character in the middle.
+level = Math.floor((s.length - 1) / 2);
+c = '='.repeat(s.length - 2 * level);
+s = e = '='.repeat(level);
+}
 level = Math.min(6, level);
 // convert surplus equals into text
 if (s.length > level) {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 9b7e84c..e01d540 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -146,7 +146,6 @@
 add("wt2html", "TOC with wgMaxTocLevel=3 (T8204)", " title 1 \n title 1.1 \n title 1.1.1 \n title 1.2 \n title 2 \n title 2.1 ");
 add("wt2html", "TOC with wgMaxTocLevel=3 and two level four headings (T8204)", 
"Section 1\nSection 1.1\nSection 
1.1.1\nSection 1.1.1.1\nSection 2");
 add("wt2html", "TOC regression (T14077)", "\n title 1 \n title 1.1 \n title 2 ");
-add("wt2html", "Short headings with trailing space should match behavior of 
Parser::doHeadings (T21910)", "=== 
\nThe line above must have a trailing space!\n===  \nBut just 
in case it doesn't...");
 add("wt2html", "Header with special characters (T27462)", "The tooltips shall not show entities to the 
user (ie. be double escaped)\n\n text > text \nsection 1\n\n text  text \nsection 2\n\n text  text 
\nsection 3\n\n text ' text 
\nsection 4\n\n text \" text 
\nsection 5");
 add("wt2html", "Header with space, plus and underscore as entity", "Id should not contain + for 
spaces\n\n Space between Text \nsection 1\n\n 
Space-Entity between Text \nsection 2\n\n 
Plus+between+Text \nsection 
3\n\n Plus-Entity+between+Text
 \nsection 4\n\n 
Underscore_between_Text \nsection 5\n\n 
Underscore-Entity_between_Text
 \nsection 6\n\n#Space between Text\n#Space-Entity
 between Text\n#Plus+between+Text\n#Plus-Entity+between+Text\n#Underscore_between_Text\n#Underscore-Entity_between_Text");
 add("wt2html", "Headers with excess '=' characters\n(Are similar tests 
necessary beyond the 1st level?)", "foo=\n=foo\nitalic heading=\n=italic heading");
@@ -862,7 +861,6 @@
 add("html2wt", "__NOEDITSECTION__ keyword", "== Section 1 ==\n\n== Section 2 
==\n");
 add("html2wt", "Link inside a section heading", "== Section with a [[wiki/Main 
Page|link]] in it ==\n");
 add("html2wt", "TOC regression (T14077)", "\n== Contents ==\n\n\n* [[#title_1|1 title 1]]\n\n** 
[[#title_1.1|1.1 title 
1.1]]\n* [[#title_2|2 title 2]]\n\n\n\n== title 1 ==\n\n=== title 1.1 
===\n\n== title 2 ==\n");
-add("html2wt", "Short headings with trailing space should match behavior of 
Parser::doHeadings (T21910)", "= = =\nThe line above must have a trailing 
space!\n\n= = =\nBut just in case it doesn't...\n");
 add("html2wt", "Header with special characters (T27462)", "The tooltips shall 
not show entities to the user (ie. be double escaped)\n\n\n== Contents ==\n\n\n* 
[[#text_.3E_text|1 text > text]]\n* [[#text_.3C_text|2 text < text]]\n* 
[[#text_.26_text|3 text & text]]\n* [[#text_.27_text|4 text ' text]]\n* 
[[#text_.22_text|5 text \" text]]\n\n\n\n== text > text ==\nsection 
1\n\n== text < text ==\nsection 2\n\n== text & text ==\nsection 3\n\n== text ' 
text ==\nsection 4\n\n== text \" text ==\nsection 5\n");
 add("html2wt", "Header with space, plus and underscore as entity", "Id should 
not contain + for spaces\n\n\n== Contents ==\n\n\n* [[#Space_between_Text|1 Space between 
Text]]\n* [[#Space-Entity_between_Text|2 Space-Entity 

[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Properly handle short headings

2017-11-28 Thread C. Scott Ananian (Code Review)
C. Scott Ananian has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/393908 )

Change subject: Properly handle short headings
..

Properly handle short headings

This is a minor issue, but it causes some test case failures which would
otherwise get blamed on I12b2a148f7170d20bd9aacd3b5b8ee1965859592.

Bug: T21910
Change-Id: I11926f2d2365755794d8f8f6647b1f0b02b827ab
---
M lib/wt2html/pegTokenizer.pegjs
M tests/parserTests-blacklist.js
M tests/parserTests.txt
3 files changed, 64 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/08/393908/1

diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs
index 3dfa339..3f7dfcd 100644
--- a/lib/wt2html/pegTokenizer.pegjs
+++ b/lib/wt2html/pegTokenizer.pegjs
@@ -489,14 +489,25 @@
 r:(
  s:$'='+ // moved in here to make s accessible to inner action
  & { return stops.inc('h'); }
- c:nested_block_line
- e:$'='+
+ ce:(
+   nested_block_line
+   $'='+
+ )?
  endTPos:("" { return endOffset(); })
  spc:(spaces / comment)*
+ & { stops.dec('h'); return ce || s.length > 2; }
  
  {
-stops.dec('h');
-var level = Math.min(s.length, e.length);
+var c = ce ? ce[0] : '';
+var e = ce ? ce[1] : '';
+var level;
+if (!ce) {
+  // split up heading
+  level = (s.length - 1) >>> 1;
+  c = '='.repeat(s.length - 2*level);
+  s = e = '='.repeat(level);
+}
+level = Math.min(s.length, e.length);
 level = Math.min(6, level);
 // convert surplus equals into text
 if (s.length > level) {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 9b7e84c..e01d540 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -146,7 +146,6 @@
 add("wt2html", "TOC with wgMaxTocLevel=3 (T8204)", " title 1 \n title 1.1 \n title 1.1.1 \n title 1.2 \n title 2 \n title 2.1 ");
 add("wt2html", "TOC with wgMaxTocLevel=3 and two level four headings (T8204)", 
"Section 1\nSection 1.1\nSection 
1.1.1\nSection 1.1.1.1\nSection 2");
 add("wt2html", "TOC regression (T14077)", "\n title 1 \n title 1.1 \n title 2 ");
-add("wt2html", "Short headings with trailing space should match behavior of 
Parser::doHeadings (T21910)", "=== 
\nThe line above must have a trailing space!\n===  \nBut just 
in case it doesn't...");
 add("wt2html", "Header with special characters (T27462)", "The tooltips shall not show entities to the 
user (ie. be double escaped)\n\n text > text \nsection 1\n\n text  text \nsection 2\n\n text  text 
\nsection 3\n\n text ' text 
\nsection 4\n\n text \" text 
\nsection 5");
 add("wt2html", "Header with space, plus and underscore as entity", "Id should not contain + for 
spaces\n\n Space between Text \nsection 1\n\n 
Space-Entity between Text \nsection 2\n\n 
Plus+between+Text \nsection 
3\n\n Plus-Entity+between+Text
 \nsection 4\n\n 
Underscore_between_Text \nsection 5\n\n 
Underscore-Entity_between_Text
 \nsection 6\n\n#Space between Text\n#Space-Entity
 between Text\n#Plus+between+Text\n#Plus-Entity+between+Text\n#Underscore_between_Text\n#Underscore-Entity_between_Text");
 add("wt2html", "Headers with excess '=' characters\n(Are similar tests 
necessary beyond the 1st level?)", "foo=\n=foo\nitalic heading=\n=italic heading");
@@ -862,7 +861,6 @@
 add("html2wt", "__NOEDITSECTION__ keyword", "== Section 1 ==\n\n== Section 2 
==\n");
 add("html2wt", "Link inside a section heading", "== Section with a [[wiki/Main 
Page|link]] in it ==\n");
 add("html2wt", "TOC regression (T14077)", "\n== Contents ==\n\n\n* [[#title_1|1 title 1]]\n\n** 
[[#title_1.1|1.1 title 
1.1]]\n* [[#title_2|2 title 2]]\n\n\n\n== title 1 ==\n\n=== title 1.1 
===\n\n== title 2 ==\n");
-add("html2wt", "Short headings with trailing space should match behavior of 
Parser::doHeadings (T21910)", "= = =\nThe line above must have a trailing 
space!\n\n= = =\nBut just in case it doesn't...\n");
 add("html2wt", "Header with special characters (T27462)", "The tooltips shall 
not show entities to the user (ie. be double escaped)\n\n\n== Contents ==\n\n\n* 
[[#text_.3E_text|1 text > text]]\n* [[#text_.3C_text|2 text < text]]\n* 
[[#text_.26_text|3 text & text]]\n* [[#text_.27_text|4 text ' text]]\n* 
[[#text_.22_text|5 text \" text]]\n\n\n\n== text > text ==\nsection 
1\n\n== text < text ==\nsection 2\n\n== text & text ==\nsection 3\n\n== text ' 
text ==\nsection 4\n\n== text \" text ==\nsection 5\n");
 add("html2wt", "Header with space, plus and underscore as entity", "Id should 
not contain + for spaces\n\n\n== Contents ==\n\n\n* [[#Space_between_Text|1 Space between 
Text]]\n* [[#Space-Entity_between_Text|2 Space-Entity between 
Text]]\n* [[#Plus.2Bbetween.2BText|3 
Plus+between+Text]]\n* 
[[#Plus-Entity.2Bbetween.2BText|4