jenkins-bot has submitted this change and it was merged. Change subject: Fix regression in tokenizer ......................................................................
Fix regression in tokenizer * Introduced in 2dad972c78134de0270152d1e10a96cb2a294eae. * When a matched expression has consumed tokens and we explicitly abort, be sure to backtrack to the reported position. * Cleans up the large diff when round tripping cawiki/2011_en_els_vols_espacials. Change-Id: Icb0dd6ca5d2c49f6e4543c5dc4bafb863903bf3a --- M lib/pegTokenizer.pegjs.txt 1 file changed, 24 insertions(+), 7 deletions(-) Approvals: Subramanya Sastry: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt index d193d46..e854b52 100644 --- a/lib/pegTokenizer.pegjs.txt +++ b/lib/pegTokenizer.pegjs.txt @@ -349,6 +349,7 @@ link:(wl:wikilink { return wl[0]; }) { if (!link || link.constructor === String) { + peg$currPos = peg$reportedPos; return peg$FAILED; } // Set the wikilink's tsr to be zero length. That token is synthetic and @@ -382,6 +383,7 @@ if ( options.env.conf.wiki.getMagicWordMatcher( 'redirect' ).test( rw ) ) { return sp.join('') + rw; } + peg$currPos = peg$reportedPos; return peg$FAILED; } @@ -660,7 +662,7 @@ spc ]); } - / & { stops.dec('h'); return false; } { return peg$FAILED; } + / & { stops.dec('h'); return false; } ) { return r; } comment @@ -842,6 +844,7 @@ } catch ( e ) { // Reject the match, and allow other fall-back productions to have a // go at it. + peg$currPos = peg$reportedPos; return peg$FAILED; } } @@ -1222,7 +1225,12 @@ pre_tag_name = tag:[prePRE]+ { tag = tag.join(''); - return tag.toLowerCase() === "pre" ? tag : peg$FAILED; + if (tag.toLowerCase() === "pre") { + return tag; + } else { + peg$currPos = peg$reportedPos; + return peg$FAILED; + } } // An indented pre block that is surrounded with pre tags. The pre tags are @@ -1442,7 +1450,7 @@ dummyText[0] === dp.skipChar) { return true; } else { - peg$currPos -= dummyText.length; + peg$currPos = peg$reportedPos; return peg$FAILED; } } @@ -1453,7 +1461,7 @@ // Ex: <ref />foo, <ref />#foo, <ref />_foo return (!currExtTag || !currExtTag.dataAttribs.extLikeTag || - currExtTag.dataAttribs.skipLen === 0) ? true : peg$FAILED; + currExtTag.dataAttribs.skipLen === 0); } ) { if ( Array.isArray(t2) ) { @@ -1517,7 +1525,12 @@ nowiki_tag_name = tag:[nowikNOWIK]+ { tag = tag.join(''); - return tag.toLowerCase() === "nowiki" ? tag : peg$FAILED; + if (tag.toLowerCase() === "nowiki") { + return tag; + } else { + peg$currPos = peg$reportedPos; + return peg$FAILED; + } } nowiki @@ -1577,7 +1590,7 @@ // <nowiki></pre></nowiki> pre_break = & "</pre>" { //console.log( stops.counters ); - return stops.counters.pre > 0 || peg$FAILED; + return stops.counters.pre > 0 ? undefined : peg$FAILED; } nowiki_content @@ -1788,6 +1801,7 @@ return [buildXMLTag(name, lcName, attribs, end, selfclose, [peg$reportedPos, peg$currPos])]; } else { // abort match if tag is not block-level + peg$currPos = peg$reportedPos; return peg$FAILED; } } @@ -2306,6 +2320,7 @@ } return new EndTagTk(name, [], dp); } else { + peg$currPos = peg$reportedPos; return peg$FAILED; } } @@ -2319,6 +2334,7 @@ tagContent = restOfInput.match(new RegExp("^(.|\n)*?(</\\s*" + incl + "\\s*>)", "m")); if (!tagContent) { + peg$currPos = peg$reportedPos; return peg$FAILED; } @@ -2351,12 +2367,13 @@ return new TagTk(name, [], dp); } else { + peg$currPos = peg$reportedPos; return peg$FAILED; } }) dummyText:('#'+ / '_'+) { var dp = inclTag.dataAttribs; if (dummyText.length !== dp.skipLen || dummyText[0] !== dp.skipChar) { - peg$currPos -= dummyText.length; + peg$currPos = peg$reportedPos; return peg$FAILED; } -- To view, visit https://gerrit.wikimedia.org/r/132144 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Icb0dd6ca5d2c49f6e4543c5dc4bafb863903bf3a Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Cscott <canan...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits