[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Properly handle short headings
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/393908 ) Change subject: Properly handle short headings .. Properly handle short headings This is a minor issue, but it causes some test case failures which would otherwise get blamed on I12b2a148f7170d20bd9aacd3b5b8ee1965859592. Bug: T21910 Change-Id: I11926f2d2365755794d8f8f6647b1f0b02b827ab --- M lib/wt2html/pegTokenizer.pegjs M tests/parserTests-blacklist.js M tests/parserTests.txt 3 files changed, 68 insertions(+), 10 deletions(-) Approvals: Subramanya Sastry: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs index 3dfa339..3f3edd6 100644 --- a/lib/wt2html/pegTokenizer.pegjs +++ b/lib/wt2html/pegTokenizer.pegjs @@ -489,14 +489,29 @@ r:( s:$'='+ // moved in here to make s accessible to inner action & { return stops.inc('h'); } - c:nested_block_line - e:$'='+ + ce:( + nested_block_line + $'='+ + )? endTPos:("" { return endOffset(); }) spc:(spaces / comment)* + & { stops.dec('h'); return ce || s.length > 2; } { -stops.dec('h'); -var level = Math.min(s.length, e.length); +var c; +var e; +var level; +if (ce) { +c = ce[0]; +e = ce[1]; +level = Math.min(s.length, e.length); +} else { +// split up equal signs into two equal parts, with at least +// one character in the middle. +level = Math.floor((s.length - 1) / 2); +c = '='.repeat(s.length - 2 * level); +s = e = '='.repeat(level); +} level = Math.min(6, level); // convert surplus equals into text if (s.length > level) { diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 9b7e84c..e01d540 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -146,7 +146,6 @@ add("wt2html", "TOC with wgMaxTocLevel=3 (T8204)", " title 1 \n title 1.1 \n title 1.1.1 \n title 1.2 \n title 2 \n title 2.1 "); add("wt2html", "TOC with wgMaxTocLevel=3 and two level four headings (T8204)", "Section 1\nSection 1.1\nSection 1.1.1\nSection 1.1.1.1\nSection 2"); add("wt2html", "TOC regression (T14077)", "\n title 1 \n title 1.1 \n title 2 "); -add("wt2html", "Short headings with trailing space should match behavior of Parser::doHeadings (T21910)", "=== \nThe line above must have a trailing space!\n=== \nBut just in case it doesn't..."); add("wt2html", "Header with special characters (T27462)", "The tooltips shall not show entities to the user (ie. be double escaped)\n\n text > text \nsection 1\n\n text text \nsection 2\n\n text text \nsection 3\n\n text ' text \nsection 4\n\n text \" text \nsection 5"); add("wt2html", "Header with space, plus and underscore as entity", "Id should not contain + for spaces\n\n Space between Text \nsection 1\n\n Space-Entity between Text \nsection 2\n\n Plus+between+Text \nsection 3\n\n Plus-Entity+between+Text \nsection 4\n\n Underscore_between_Text \nsection 5\n\n Underscore-Entity_between_Text \nsection 6\n\n#Space between Text\n#Space-Entity between Text\n#Plus+between+Text\n#Plus-Entity+between+Text\n#Underscore_between_Text\n#Underscore-Entity_between_Text"); add("wt2html", "Headers with excess '=' characters\n(Are similar tests necessary beyond the 1st level?)", "foo=\n=foo\nitalic heading=\n=italic heading"); @@ -862,7 +861,6 @@ add("html2wt", "__NOEDITSECTION__ keyword", "== Section 1 ==\n\n== Section 2 ==\n"); add("html2wt", "Link inside a section heading", "== Section with a [[wiki/Main Page|link]] in it ==\n"); add("html2wt", "TOC regression (T14077)", "\n== Contents ==\n\n\n* [[#title_1|1 title 1]]\n\n** [[#title_1.1|1.1 title 1.1]]\n* [[#title_2|2 title 2]]\n\n\n\n== title 1 ==\n\n=== title 1.1 ===\n\n== title 2 ==\n"); -add("html2wt", "Short headings with trailing space should match behavior of Parser::doHeadings (T21910)", "= = =\nThe line above must have a trailing space!\n\n= = =\nBut just in case it doesn't...\n"); add("html2wt", "Header with special characters (T27462)", "The tooltips shall not show entities to the user (ie. be double escaped)\n\n\n== Contents ==\n\n\n* [[#text_.3E_text|1 text > text]]\n* [[#text_.3C_text|2 text < text]]\n* [[#text_.26_text|3 text & text]]\n* [[#text_.27_text|4 text ' text]]\n* [[#text_.22_text|5 text \" text]]\n\n\n\n== text > text ==\nsection 1\n\n== text < text ==\nsection 2\n\n== text & text ==\nsection 3\n\n== text ' text ==\nsection 4\n\n== text \" text ==\nsection 5\n"); add("html2wt", "Header with space, plus and underscore as entity", "Id should not contain + for spaces\n\n\n== Contents ==\n\n\n* [[#Space_between_Text|1 Space between Text]]\n* [[#Space-Entity_between_Text|2 Space-Entity
[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Properly handle short headings
C. Scott Ananian has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/393908 ) Change subject: Properly handle short headings .. Properly handle short headings This is a minor issue, but it causes some test case failures which would otherwise get blamed on I12b2a148f7170d20bd9aacd3b5b8ee1965859592. Bug: T21910 Change-Id: I11926f2d2365755794d8f8f6647b1f0b02b827ab --- M lib/wt2html/pegTokenizer.pegjs M tests/parserTests-blacklist.js M tests/parserTests.txt 3 files changed, 64 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/08/393908/1 diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs index 3dfa339..3f7dfcd 100644 --- a/lib/wt2html/pegTokenizer.pegjs +++ b/lib/wt2html/pegTokenizer.pegjs @@ -489,14 +489,25 @@ r:( s:$'='+ // moved in here to make s accessible to inner action & { return stops.inc('h'); } - c:nested_block_line - e:$'='+ + ce:( + nested_block_line + $'='+ + )? endTPos:("" { return endOffset(); }) spc:(spaces / comment)* + & { stops.dec('h'); return ce || s.length > 2; } { -stops.dec('h'); -var level = Math.min(s.length, e.length); +var c = ce ? ce[0] : ''; +var e = ce ? ce[1] : ''; +var level; +if (!ce) { + // split up heading + level = (s.length - 1) >>> 1; + c = '='.repeat(s.length - 2*level); + s = e = '='.repeat(level); +} +level = Math.min(s.length, e.length); level = Math.min(6, level); // convert surplus equals into text if (s.length > level) { diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 9b7e84c..e01d540 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -146,7 +146,6 @@ add("wt2html", "TOC with wgMaxTocLevel=3 (T8204)", " title 1 \n title 1.1 \n title 1.1.1 \n title 1.2 \n title 2 \n title 2.1 "); add("wt2html", "TOC with wgMaxTocLevel=3 and two level four headings (T8204)", "Section 1\nSection 1.1\nSection 1.1.1\nSection 1.1.1.1\nSection 2"); add("wt2html", "TOC regression (T14077)", "\n title 1 \n title 1.1 \n title 2 "); -add("wt2html", "Short headings with trailing space should match behavior of Parser::doHeadings (T21910)", "=== \nThe line above must have a trailing space!\n=== \nBut just in case it doesn't..."); add("wt2html", "Header with special characters (T27462)", "The tooltips shall not show entities to the user (ie. be double escaped)\n\n text > text \nsection 1\n\n text text \nsection 2\n\n text text \nsection 3\n\n text ' text \nsection 4\n\n text \" text \nsection 5"); add("wt2html", "Header with space, plus and underscore as entity", "Id should not contain + for spaces\n\n Space between Text \nsection 1\n\n Space-Entity between Text \nsection 2\n\n Plus+between+Text \nsection 3\n\n Plus-Entity+between+Text \nsection 4\n\n Underscore_between_Text \nsection 5\n\n Underscore-Entity_between_Text \nsection 6\n\n#Space between Text\n#Space-Entity between Text\n#Plus+between+Text\n#Plus-Entity+between+Text\n#Underscore_between_Text\n#Underscore-Entity_between_Text"); add("wt2html", "Headers with excess '=' characters\n(Are similar tests necessary beyond the 1st level?)", "foo=\n=foo\nitalic heading=\n=italic heading"); @@ -862,7 +861,6 @@ add("html2wt", "__NOEDITSECTION__ keyword", "== Section 1 ==\n\n== Section 2 ==\n"); add("html2wt", "Link inside a section heading", "== Section with a [[wiki/Main Page|link]] in it ==\n"); add("html2wt", "TOC regression (T14077)", "\n== Contents ==\n\n\n* [[#title_1|1 title 1]]\n\n** [[#title_1.1|1.1 title 1.1]]\n* [[#title_2|2 title 2]]\n\n\n\n== title 1 ==\n\n=== title 1.1 ===\n\n== title 2 ==\n"); -add("html2wt", "Short headings with trailing space should match behavior of Parser::doHeadings (T21910)", "= = =\nThe line above must have a trailing space!\n\n= = =\nBut just in case it doesn't...\n"); add("html2wt", "Header with special characters (T27462)", "The tooltips shall not show entities to the user (ie. be double escaped)\n\n\n== Contents ==\n\n\n* [[#text_.3E_text|1 text > text]]\n* [[#text_.3C_text|2 text < text]]\n* [[#text_.26_text|3 text & text]]\n* [[#text_.27_text|4 text ' text]]\n* [[#text_.22_text|5 text \" text]]\n\n\n\n== text > text ==\nsection 1\n\n== text < text ==\nsection 2\n\n== text & text ==\nsection 3\n\n== text ' text ==\nsection 4\n\n== text \" text ==\nsection 5\n"); add("html2wt", "Header with space, plus and underscore as entity", "Id should not contain + for spaces\n\n\n== Contents ==\n\n\n* [[#Space_between_Text|1 Space between Text]]\n* [[#Space-Entity_between_Text|2 Space-Entity between Text]]\n* [[#Plus.2Bbetween.2BText|3 Plus+between+Text]]\n* [[#Plus-Entity.2Bbetween.2BText|4