Arlolra has uploaded a new change for review. https://gerrit.wikimedia.org/r/132144
Change subject: Fix regression in tokenizer ...................................................................... Fix regression in tokenizer * Introduced in 2dad972c78134de0270152d1e10a96cb2a294eae. * When a matched expression has consumed tokens and we explicitly abort, be sure to backtrack to the reported position. * Cleans up the large diff when round tripping cawiki/2011_en_els_vols_espacials. Change-Id: Icb0dd6ca5d2c49f6e4543c5dc4bafb863903bf3a --- M lib/pegTokenizer.pegjs.txt 1 file changed, 21 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/44/132144/1 diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt index d193d46..a39bccd 100644 --- a/lib/pegTokenizer.pegjs.txt +++ b/lib/pegTokenizer.pegjs.txt @@ -349,6 +349,7 @@ link:(wl:wikilink { return wl[0]; }) { if (!link || link.constructor === String) { + peg$currPos = peg$reportedPos; return peg$FAILED; } // Set the wikilink's tsr to be zero length. That token is synthetic and @@ -382,6 +383,7 @@ if ( options.env.conf.wiki.getMagicWordMatcher( 'redirect' ).test( rw ) ) { return sp.join('') + rw; } + peg$currPos = peg$reportedPos; return peg$FAILED; } @@ -842,6 +844,7 @@ } catch ( e ) { // Reject the match, and allow other fall-back productions to have a // go at it. + peg$currPos = peg$reportedPos; return peg$FAILED; } } @@ -1222,7 +1225,12 @@ pre_tag_name = tag:[prePRE]+ { tag = tag.join(''); - return tag.toLowerCase() === "pre" ? tag : peg$FAILED; + if (tag.toLowerCase() === "pre") { + return tag; + } else { + peg$currPos = peg$reportedPos; + return peg$FAILED; + } } // An indented pre block that is surrounded with pre tags. The pre tags are @@ -1442,7 +1450,7 @@ dummyText[0] === dp.skipChar) { return true; } else { - peg$currPos -= dummyText.length; + peg$currPos = peg$reportedPos; return peg$FAILED; } } @@ -1517,7 +1525,12 @@ nowiki_tag_name = tag:[nowikNOWIK]+ { tag = tag.join(''); - return tag.toLowerCase() === "nowiki" ? tag : peg$FAILED; + if (tag.toLowerCase() === "nowiki") { + return tag; + } else { + peg$currPos = peg$reportedPos; + return peg$FAILED; + } } nowiki @@ -1788,6 +1801,7 @@ return [buildXMLTag(name, lcName, attribs, end, selfclose, [peg$reportedPos, peg$currPos])]; } else { // abort match if tag is not block-level + peg$currPos = peg$reportedPos; return peg$FAILED; } } @@ -2306,6 +2320,7 @@ } return new EndTagTk(name, [], dp); } else { + peg$currPos = peg$reportedPos; return peg$FAILED; } } @@ -2319,6 +2334,7 @@ tagContent = restOfInput.match(new RegExp("^(.|\n)*?(</\\s*" + incl + "\\s*>)", "m")); if (!tagContent) { + peg$currPos = peg$reportedPos; return peg$FAILED; } @@ -2351,12 +2367,13 @@ return new TagTk(name, [], dp); } else { + peg$currPos = peg$reportedPos; return peg$FAILED; } }) dummyText:('#'+ / '_'+) { var dp = inclTag.dataAttribs; if (dummyText.length !== dp.skipLen || dummyText[0] !== dp.skipChar) { - peg$currPos -= dummyText.length; + peg$currPos = peg$reportedPos; return peg$FAILED; } -- To view, visit https://gerrit.wikimedia.org/r/132144 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Icb0dd6ca5d2c49f6e4543c5dc4bafb863903bf3a Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits