jenkins-bot has submitted this change and it was merged.

Change subject: "!!" should only break "!"-based <th> on the line they start
......................................................................


"!!" should only break "!"-based <th> on the line they start

 * Fixes the semantic diff rt'ing kowiki/마이티_모

 * A similar issue exists with "||" as in,

   {|
   |a
   b||c
   |}

   but that's not for the faint-hearted.

Bug: T91411
Change-Id: I28e42342fb0aeb89316574780df0cba7bdaf721f
---
M lib/mediawiki.tokenizer.utils.js
M lib/pegTokenizer.pegjs.txt
M lib/wts.ConstrainedText.js
M lib/wts.escapeWikitext.js
M tests/parserTests.txt
5 files changed, 99 insertions(+), 11 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/mediawiki.tokenizer.utils.js b/lib/mediawiki.tokenizer.utils.js
index 76c59af..3b8cdd3 100644
--- a/lib/mediawiki.tokenizer.utils.js
+++ b/lib/mediawiki.tokenizer.utils.js
@@ -206,7 +206,7 @@
                                                counters.tableCellArg))
                                ) && input.substr(pos, 5) === '{{!}}';
                        case "!":
-                               return stops.onStack('th') &&
+                               return stops.onStack('th') !== false &&
                                        !stops.onCount('templatedepth') &&
                                        input[pos + 1] === "!";
                        case "}":
diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index 918c337..55c3424 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -1714,6 +1714,7 @@
 
 table_heading_tags
   = "!"
+    & { return stops.push('th', endOffset()); }
     th:table_heading_tag
     ths:( pp:("!!" / pipe_pipe) tht:table_heading_tag {
             var da = tht[0].dataAttribs;
@@ -1727,19 +1728,32 @@
             return tht;
           }
     )* {
+        stops.pop('th');
         th[0].dataAttribs.tsr[0]--; // include "!"
         return th.concat(ths);
     }
+    / & { return stops.onStack('th') !== false ? stops.pop('th') : false; }
 
 table_heading_tag
-  = & { return stops.push('th', true); }
-    arg:row_syntax_table_args?
-    tagEndPos:({return endOffset();})
-    c:nested_block_in_table* {
-        stops.pop('th');
+  = arg:row_syntax_table_args?
+    tagEndPos:({ return endOffset(); })
+    c:( & {
+      // This SyntaxStop is only true until we hit the end of the line.
+      if (stops.onStack('th') !== false) {
+        if (/\n/.test(input.substring(stops.onStack('th'), endOffset()))) {
+          // There's been a newline. Remove the break and continue
+          // tokenizing nested_block_in_tables.
+          stops.pop('th');
+        } else if (input.substr(endOffset() + 1, 2) === '!!') {
+          // Ok, no newlines and we broke for a double exclamation.
+          // Stop here and let the next table_heading_tag proceed.
+          return false;
+        }
+      }
+      return true;
+    } d:nested_block_in_table { return d; } )* {
         return tu.buildTableTokens("th", "!", arg, [startOffset(), tagEndPos], 
endOffset(), c);
     }
-    / & { return stops.pop('th'); }
 
 table_end_tag
   = sc:(space / comment)* startPos:({ return endOffset(); }) p:pipe b:"}" {
diff --git a/lib/wts.ConstrainedText.js b/lib/wts.ConstrainedText.js
index 13662b2..0227d97 100644
--- a/lib/wts.ConstrainedText.js
+++ b/lib/wts.ConstrainedText.js
@@ -18,7 +18,7 @@
 var util = require('util');
 
 /**
- * This adds neccessary escapes to a line of chunks.  We provide
+ * This adds necessary escapes to a line of chunks.  We provide
  * the `ConstrainedText#escape` function with its left and right
  * context, and it can determine what escapes are needed.
  *
diff --git a/lib/wts.escapeWikitext.js b/lib/wts.escapeWikitext.js
index 7c5ca2c..355a9c6 100644
--- a/lib/wts.escapeWikitext.js
+++ b/lib/wts.escapeWikitext.js
@@ -161,8 +161,13 @@
        // So, no matter where in the DOM subtree of the <th> node
        // that text shows up in, we have to unconditionally escape
        // the !! and | characters.
-
-       return text.match(/!!|\|/);
+       //
+       // That is, so long as it serializes to the same line as the
+       // heading was started.
+       var line = state.currLine.chunks.reduce(function(prev, curr) {
+               return prev + curr.text;
+       }, state.currLine.text);
+       return line.match(/^\s*!/) && text.match(/^[^\n]*!!|\|/);
 };
 
 WEHP.wikilinkHandler = function(state, text) {
@@ -725,7 +730,7 @@
                // Detect if we have open brackets or heading chars -- we use 
'processed' flag
                // as a performance opt. to run this detection only if/when 
required.
                //
-               // FIXME: Even so, it is reset after after every emitted text 
chunk.
+               // FIXME: Even so, it is reset after every emitted text chunk.
                // Could be optimized further by figuring out a way to only test
                // newer chunks, but not sure if it is worth the trouble and 
complexity
                var cl = state.currLine;
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 1d6e5cd..e0acd0a 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -1507,6 +1507,30 @@
 {{echo|a <nowiki>}</nowiki>}}
 !! end
 
+!! test
+Cases where "!!" needs nowiki protection
+!! options
+parsoid=html2wt
+!! html
+<table>
+<tr><th>this needs protection !! here</th></tr>
+</table>
+
+<table>
+<tr><th>this does not need
+protection !! here</th></tr>
+</table>
+!! wikitext
+{|
+!<nowiki>this needs protection !! here</nowiki>
+|}
+
+{|
+!this does not need
+protection !! here
+|}
+!! end
+
 ###
 ### Comments
 ###
@@ -5699,6 +5723,51 @@
 !! end
 
 !! test
+Accept "!!" in table headings after newline
+!! wikitext
+{|
+!a
+b!!c
+|}
+!! html/php
+<table>
+<tr>
+<th>a
+<p>b!!c
+</p>
+</th></tr></table>
+
+!! html/parsoid
+<table>
+<tbody><tr><th>a
+<p>b!!c</p></th></tr>
+</tbody></table>
+!! end
+
+!! test
+Accept "!!" in table data of mixed wikitext / html syntax
+!! wikitext
+{|
+!a
+<tr><td>b!!c</td></tr>
+|}
+!! html+tidy
+<table>
+<tr>
+<th>a</th>
+</tr>
+<tr>
+<td>b!!c</td>
+</tr>
+</table>
+!! html/parsoid
+<table>
+<tbody><tr><th>a</th></tr>
+<tr data-parsoid='{"stx":"html"}'><td 
data-parsoid='{"stx":"html"}'>b!!c</td></tr>
+</tbody></table>
+!! end
+
+!! test
 Accept empty attributes in td/th cells (td/th cells starting with leading ||)
 !! wikitext
 {|

-- 
To view, visit https://gerrit.wikimedia.org/r/216015
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I28e42342fb0aeb89316574780df0cba7bdaf721f
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to