Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/132144

Change subject: Fix regression in tokenizer
......................................................................

Fix regression in tokenizer

 * Introduced in 2dad972c78134de0270152d1e10a96cb2a294eae.

 * When a matched expression has consumed tokens and we explicitly
   abort, be sure to backtrack to the reported position.

 * Cleans up the large diff when round tripping
   cawiki/2011_en_els_vols_espacials.

Change-Id: Icb0dd6ca5d2c49f6e4543c5dc4bafb863903bf3a
---
M lib/pegTokenizer.pegjs.txt
1 file changed, 21 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/44/132144/1

diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index d193d46..a39bccd 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -349,6 +349,7 @@
     link:(wl:wikilink { return wl[0]; })
 {
     if (!link || link.constructor === String) {
+        peg$currPos = peg$reportedPos;
         return peg$FAILED;
     }
     // Set the wikilink's tsr to be zero length. That token is synthetic and
@@ -382,6 +383,7 @@
     if ( options.env.conf.wiki.getMagicWordMatcher( 'redirect' ).test( rw ) ) {
         return sp.join('') + rw;
     }
+    peg$currPos = peg$reportedPos;
     return peg$FAILED;
 }
 
@@ -842,6 +844,7 @@
     } catch ( e ) {
         // Reject the match, and allow other fall-back productions to have a
         // go at it.
+        peg$currPos = peg$reportedPos;
         return peg$FAILED;
     }
 }
@@ -1222,7 +1225,12 @@
 pre_tag_name =
   tag:[prePRE]+ {
       tag = tag.join('');
-      return tag.toLowerCase() === "pre" ? tag : peg$FAILED;
+      if (tag.toLowerCase() === "pre") {
+          return tag;
+      } else {
+          peg$currPos = peg$reportedPos;
+          return peg$FAILED;
+      }
   }
 
 // An indented pre block that is surrounded with pre tags. The pre tags are
@@ -1442,7 +1450,7 @@
             dummyText[0] === dp.skipChar) {
             return true;
          } else {
-            peg$currPos -= dummyText.length;
+            peg$currPos = peg$reportedPos;
             return peg$FAILED;
          }
       }
@@ -1517,7 +1525,12 @@
 nowiki_tag_name =
   tag:[nowikNOWIK]+ {
       tag = tag.join('');
-      return tag.toLowerCase() === "nowiki" ? tag : peg$FAILED;
+      if (tag.toLowerCase() === "nowiki") {
+          return tag;
+      } else {
+          peg$currPos = peg$reportedPos;
+          return peg$FAILED;
+      }
   }
 
 nowiki
@@ -1788,6 +1801,7 @@
             return [buildXMLTag(name, lcName, attribs, end, selfclose, 
[peg$reportedPos, peg$currPos])];
         } else {
             // abort match if tag is not block-level
+            peg$currPos = peg$reportedPos;
             return peg$FAILED;
         }
     }
@@ -2306,6 +2320,7 @@
          }
          return new EndTagTk(name, [], dp);
      } else {
+         peg$currPos = peg$reportedPos;
          return peg$FAILED;
      }
   }
@@ -2319,6 +2334,7 @@
              tagContent = restOfInput.match(new RegExp("^(.|\n)*?(</\\s*" + 
incl + "\\s*>)", "m"));
 
          if (!tagContent) {
+            peg$currPos = peg$reportedPos;
             return peg$FAILED;
          }
 
@@ -2351,12 +2367,13 @@
 
          return new TagTk(name, [], dp);
      } else {
+         peg$currPos = peg$reportedPos;
          return peg$FAILED;
      }
   }) dummyText:('#'+ / '_'+) {
       var dp = inclTag.dataAttribs;
       if (dummyText.length !== dp.skipLen || dummyText[0] !== dp.skipChar) {
-          peg$currPos -= dummyText.length;
+          peg$currPos = peg$reportedPos;
           return peg$FAILED;
       }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/132144
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Icb0dd6ca5d2c49f6e4543c5dc4bafb863903bf3a
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abrea...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to