Arlolra has uploaded a new change for review. https://gerrit.wikimedia.org/r/96678
Change subject: Add an HTML_PRE state to the PreHandler state machine. ...................................................................... Add an HTML_PRE state to the PreHandler state machine. * This removes the need for an inPre property. * It was suggested to avoid the confusion in, https://gerrit.wikimedia.org/r/#/c/96407/2/js/lib/ext.core.PreHandler.js * Relevant commits are: 20c6afe7f51d14e978f348f376d58e28a9d2b3df 81ab0f9f1d9a5a0830f1d14a98e10a959d173620 Change-Id: Iaff9b0d00e2dd052c7500c47fe3a98c96cb5108e --- M js/lib/ext.core.PreHandler.js 1 file changed, 53 insertions(+), 41 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid refs/changes/78/96678/1 diff --git a/js/lib/ext.core.PreHandler.js b/js/lib/ext.core.PreHandler.js index ddbf044..cbc519a 100644 --- a/js/lib/ext.core.PreHandler.js +++ b/js/lib/ext.core.PreHandler.js @@ -1,7 +1,7 @@ "use strict"; /* -------------------------------------------------------------------------- - PRE-handling relies on the following 5-state FSM. + PRE-handling relies on the following 6-state FSM. ------ States @@ -16,6 +16,8 @@ (depending on whether we see a white-space tok or not) IGNORE -- nothing to do for the rest of the line. + HTML_PRE -- we're already in an html pre. + ----------- Transitions ----------- @@ -28,9 +30,10 @@ + --------------+-----------------+---------------+--------------------------+ | SOL | --- nl --> | SOL | purge | | SOL | --- eof --> | SOL | purge | - | SOL | --- ws --> | PRE|SOL | save ws token|purge(#,##)| + | SOL | --- ws --> | PRE | save whitespace token(##)| | SOL | --- sol-tr --> | SOL | TOKS << tok | | SOL | --- other --> | IGNORE | purge | + | SOL | --- pre --> | HTML_PRE | purge (#) | + --------------+-----------------+---------------+--------------------------+ | PRE | --- nl --> | SOL | purge | | PRE | html-blk tag | IGNORE | purge | @@ -53,11 +56,14 @@ + --------------+-----------------+---------------+--------------------------+ | IGNORE | --- nl --> | SOL | purge | | IGNORE | --- eof --> | SOL | purge | + | IGNORE | --- pre --> | HTML_PRE | purge (#) | + + --------------+-----------------+---------------+--------------------------+ + | HTML_PRE | --- pre --> | IGNORE | purge | + --------------+-----------------+---------------+--------------------------+ # We're being careful to avoid a situation where we generate a pre when we're - already inside a pre. If we've seen an open pre tag (marked as inPre), stay - in SOL and purge. Otherwise, save the whitespace token and transition to PRE. + already inside a html pre. If we've seen an open pre tag, transition to + HTML_PRE and purge. ## In these states, check if the whitespace token is a single space or has additional chars (white-space or non-whitespace) -- if yes, slice it off @@ -67,14 +73,13 @@ var Util = require('./mediawiki.Util.js').Util, defines = require('./mediawiki.parser.defines.js'); + // define some constructor shortcuts var CommentTk = defines.CommentTk, EOFTk = defines.EOFTk, TagTk = defines.TagTk, SelfclosingTagTk = defines.SelfclosingTagTk, EndTagTk = defines.EndTagTk; - -var init; // forward declaration. function isPre( token, tag ) { return token.constructor === tag && token.isHTMLTag() && token.name.toUpperCase() === "PRE"; @@ -92,7 +97,9 @@ "PreHandler:onNewline", this.nlRank, 'newline'); this.manager.addTransform(this.onEnd.bind(this), "PreHandler:onEnd", this.endRank, 'end'); - init(this, true); + this.manager.addTransform(this.onAny.bind(this), + "PreHandler:onAny", this.anyRank, 'any'); + this.init(); } } @@ -108,6 +115,7 @@ PreHandler.STATE_PRE_COLLECT = 3; PreHandler.STATE_MULTILINE_PRE = 4; PreHandler.STATE_IGNORE = 5; +PreHandler.STATE_HTML_PRE = 6; // debug string output of FSM states PreHandler.STATE_STR = { @@ -115,30 +123,25 @@ 2: 'pre ', 3: 'pre_collect', 4: 'multiline ', - 5: 'ignore ' + 5: 'ignore ', + 6: 'html_pre ' }; -init = function(handler, addAnyHandler) { - handler.state = PreHandler.STATE_SOL; - handler.lastNlTk = null; +PreHandler.prototype.init = function() { + this.state = PreHandler.STATE_SOL; + this.lastNlTk = null; // Initialize to zero to deal with indent-pre // on the very first line where there is no // preceding newline to initialize this. - handler.preTSR = 0; - handler.tokens = []; - handler.preWSToken = null; - handler.multiLinePreWSToken = null; - handler.solTransparentTokens = []; - if (addAnyHandler) { - handler.manager.addTransform(handler.onAny.bind(handler), - "PreHandler:onAny", handler.anyRank, 'any'); - } - handler.inPre = false; + this.preTSR = 0; + this.tokens = []; + this.preWSToken = null; + this.multiLinePreWSToken = null; + this.solTransparentTokens = []; }; PreHandler.prototype.moveToIgnoreState = function() { this.state = PreHandler.STATE_IGNORE; - this.manager.removeTransform(this.anyRank, 'any'); }; PreHandler.prototype.popLastNL = function(ret) { @@ -246,8 +249,12 @@ case PreHandler.STATE_IGNORE: ret = [token]; ret.rank = this.skipRank; // prevent this from being processed again - init(this, true); // Reset! + this.init(); // Reset! this.preTSR = initPreTSR(token); + break; + + case PreHandler.STATE_HTML_PRE: + ret = [ token ]; break; } @@ -260,15 +267,13 @@ }; PreHandler.prototype.onEnd = function (token, manager, cb) { - this.inPre = false; - if (this.state !== PreHandler.STATE_IGNORE) { console.error("!ERROR! Not IGNORE! Cannot get here: " + this.state + "; " + JSON.stringify(token)); - init(this, false); + this.init(); return {tokens: [token]}; } - init(this, true); + this.init(); return {tokens: [token]}; }; @@ -289,20 +294,9 @@ PreHandler.prototype.onAny = function ( token, manager, cb ) { - if ( isPre( token, TagTk ) ) { - this.inPre = true; - } else if ( isPre( token, EndTagTk ) ) { - this.inPre = false; - } - if (this.trace) { if (this.debug) { console.warn("----------"); } console.warn("T:pre:any: " + PreHandler.STATE_STR[this.state] + " : " + JSON.stringify(token)); - } - - if (this.state === PreHandler.STATE_IGNORE) { - console.error("!ERROR! IGNORE! Cannot get here: " + JSON.stringify(token)); - return {tokens: null}; } var ret = null; @@ -311,6 +305,8 @@ switch (this.state) { case PreHandler.STATE_SOL: case PreHandler.STATE_PRE: + case PreHandler.STATE_HTML_PRE: + case PreHandler.STATE_IGNORE: ret = this.getResultAndReset(token); break; @@ -321,12 +317,11 @@ } // reset for next use of this pipeline! - this.inPre = false; - init(this, false); + this.init(); } else { switch (this.state) { case PreHandler.STATE_SOL: - if ((tc === String) && token.match(/^ /) && !this.inPre) { + if ((tc === String) && token.match(/^ /)) { ret = this.tokens; this.tokens = []; this.preWSToken = token[0]; @@ -341,6 +336,9 @@ // update pre-tsr since we haven't transitioned to PRE yet this.preTSR = getUpdatedPreTSR(this.preTSR, token); this.tokens.push(token); + } else if ( isPre( token, TagTk ) ) { + ret = this.getResultAndReset( token ); + this.state = PreHandler.STATE_HTML_PRE; } else { ret = this.getResultAndReset(token); this.moveToIgnoreState(); @@ -401,6 +399,20 @@ this.moveToIgnoreState(); } break; + + case PreHandler.STATE_IGNORE: + if ( isPre( token, TagTk ) ) { + this.state = PreHandler.STATE_HTML_PRE; + } + ret = [ token ]; + break; + + case PreHandler.STATE_HTML_PRE: + if ( isPre( token, EndTagTk ) ) { + this.moveToIgnoreState(); + } + ret = [ token ]; + break; } } -- To view, visit https://gerrit.wikimedia.org/r/96678 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iaff9b0d00e2dd052c7500c47fe3a98c96cb5108e Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits