jenkins-bot has submitted this change and it was merged.
Change subject: "!!" should only break "!"-based <th> on the line they start
......................................................................
"!!" should only break "!"-based <th> on the line they start
* Fixes the semantic diff rt'ing kowiki/마이티_모
* A similar issue exists with "||" as in,
{|
|a
b||c
|}
but that's not for the faint-hearted.
Bug: T91411
Change-Id: I28e42342fb0aeb89316574780df0cba7bdaf721f
---
M lib/mediawiki.tokenizer.utils.js
M lib/pegTokenizer.pegjs.txt
M lib/wts.ConstrainedText.js
M lib/wts.escapeWikitext.js
M tests/parserTests.txt
5 files changed, 99 insertions(+), 11 deletions(-)
Approvals:
Subramanya Sastry: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/mediawiki.tokenizer.utils.js b/lib/mediawiki.tokenizer.utils.js
index 76c59af..3b8cdd3 100644
--- a/lib/mediawiki.tokenizer.utils.js
+++ b/lib/mediawiki.tokenizer.utils.js
@@ -206,7 +206,7 @@
counters.tableCellArg))
) && input.substr(pos, 5) === '{{!}}';
case "!":
- return stops.onStack('th') &&
+ return stops.onStack('th') !== false &&
!stops.onCount('templatedepth') &&
input[pos + 1] === "!";
case "}":
diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index 918c337..55c3424 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -1714,6 +1714,7 @@
table_heading_tags
= "!"
+ & { return stops.push('th', endOffset()); }
th:table_heading_tag
ths:( pp:("!!" / pipe_pipe) tht:table_heading_tag {
var da = tht[0].dataAttribs;
@@ -1727,19 +1728,32 @@
return tht;
}
)* {
+ stops.pop('th');
th[0].dataAttribs.tsr[0]--; // include "!"
return th.concat(ths);
}
+ / & { return stops.onStack('th') !== false ? stops.pop('th') : false; }
table_heading_tag
- = & { return stops.push('th', true); }
- arg:row_syntax_table_args?
- tagEndPos:({return endOffset();})
- c:nested_block_in_table* {
- stops.pop('th');
+ = arg:row_syntax_table_args?
+ tagEndPos:({ return endOffset(); })
+ c:( & {
+ // This SyntaxStop is only true until we hit the end of the line.
+ if (stops.onStack('th') !== false) {
+ if (/\n/.test(input.substring(stops.onStack('th'), endOffset()))) {
+ // There's been a newline. Remove the break and continue
+ // tokenizing nested_block_in_tables.
+ stops.pop('th');
+ } else if (input.substr(endOffset() + 1, 2) === '!!') {
+ // Ok, no newlines and we broke for a double exclamation.
+ // Stop here and let the next table_heading_tag proceed.
+ return false;
+ }
+ }
+ return true;
+ } d:nested_block_in_table { return d; } )* {
return tu.buildTableTokens("th", "!", arg, [startOffset(), tagEndPos],
endOffset(), c);
}
- / & { return stops.pop('th'); }
table_end_tag
= sc:(space / comment)* startPos:({ return endOffset(); }) p:pipe b:"}" {
diff --git a/lib/wts.ConstrainedText.js b/lib/wts.ConstrainedText.js
index 13662b2..0227d97 100644
--- a/lib/wts.ConstrainedText.js
+++ b/lib/wts.ConstrainedText.js
@@ -18,7 +18,7 @@
var util = require('util');
/**
- * This adds neccessary escapes to a line of chunks. We provide
+ * This adds necessary escapes to a line of chunks. We provide
* the `ConstrainedText#escape` function with its left and right
* context, and it can determine what escapes are needed.
*
diff --git a/lib/wts.escapeWikitext.js b/lib/wts.escapeWikitext.js
index 7c5ca2c..355a9c6 100644
--- a/lib/wts.escapeWikitext.js
+++ b/lib/wts.escapeWikitext.js
@@ -161,8 +161,13 @@
// So, no matter where in the DOM subtree of the <th> node
// that text shows up in, we have to unconditionally escape
// the !! and | characters.
-
- return text.match(/!!|\|/);
+ //
+ // That is, so long as it serializes to the same line as the
+ // heading was started.
+ var line = state.currLine.chunks.reduce(function(prev, curr) {
+ return prev + curr.text;
+ }, state.currLine.text);
+ return line.match(/^\s*!/) && text.match(/^[^\n]*!!|\|/);
};
WEHP.wikilinkHandler = function(state, text) {
@@ -725,7 +730,7 @@
// Detect if we have open brackets or heading chars -- we use
'processed' flag
// as a performance opt. to run this detection only if/when
required.
//
- // FIXME: Even so, it is reset after after every emitted text
chunk.
+ // FIXME: Even so, it is reset after every emitted text chunk.
// Could be optimized further by figuring out a way to only test
// newer chunks, but not sure if it is worth the trouble and
complexity
var cl = state.currLine;
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 1d6e5cd..e0acd0a 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -1507,6 +1507,30 @@
{{echo|a <nowiki>}</nowiki>}}
!! end
+!! test
+Cases where "!!" needs nowiki protection
+!! options
+parsoid=html2wt
+!! html
+<table>
+<tr><th>this needs protection !! here</th></tr>
+</table>
+
+<table>
+<tr><th>this does not need
+protection !! here</th></tr>
+</table>
+!! wikitext
+{|
+!<nowiki>this needs protection !! here</nowiki>
+|}
+
+{|
+!this does not need
+protection !! here
+|}
+!! end
+
###
### Comments
###
@@ -5699,6 +5723,51 @@
!! end
!! test
+Accept "!!" in table headings after newline
+!! wikitext
+{|
+!a
+b!!c
+|}
+!! html/php
+<table>
+<tr>
+<th>a
+<p>b!!c
+</p>
+</th></tr></table>
+
+!! html/parsoid
+<table>
+<tbody><tr><th>a
+<p>b!!c</p></th></tr>
+</tbody></table>
+!! end
+
+!! test
+Accept "!!" in table data of mixed wikitext / html syntax
+!! wikitext
+{|
+!a
+<tr><td>b!!c</td></tr>
+|}
+!! html+tidy
+<table>
+<tr>
+<th>a</th>
+</tr>
+<tr>
+<td>b!!c</td>
+</tr>
+</table>
+!! html/parsoid
+<table>
+<tbody><tr><th>a</th></tr>
+<tr data-parsoid='{"stx":"html"}'><td
data-parsoid='{"stx":"html"}'>b!!c</td></tr>
+</tbody></table>
+!! end
+
+!! test
Accept empty attributes in td/th cells (td/th cells starting with leading ||)
!! wikitext
{|
--
To view, visit https://gerrit.wikimedia.org/r/216015
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I28e42342fb0aeb89316574780df0cba7bdaf721f
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits