MarkTraceur has uploaded a new change for review. https://gerrit.wikimedia.org/r/51784
Change subject: Be a little smarter when escaping wikitext ...................................................................... Be a little smarter when escaping wikitext Not related to any bug, but this might be a more desirable way to escape wikitext in text nodes we serialize. Test case: echo "[maybe some '''text''' in the non-link]" | node parse --html2wt Before: <nowiki>[maybe some '''text''' in the non-link]</nowiki> Now: [maybe some <nowiki>'''</nowiki>text<nowiki>'''</nowiki> in the non-link] One other possible solution is to wrap nowiki around the first wikitext token and emit the </nowiki> only after the last wikitext token in the text we're processing. Also, we could probably use this method in other places, I only tested it with links (as above), so I only added it in one spot. Change-Id: I420e545ca5f83aa00e1a8c7db81b4dd40a246cb8 --- M js/lib/mediawiki.WikitextSerializer.js 1 file changed, 31 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid refs/changes/84/51784/1 diff --git a/js/lib/mediawiki.WikitextSerializer.js b/js/lib/mediawiki.WikitextSerializer.js index 5b19afb..054e4df 100644 --- a/js/lib/mediawiki.WikitextSerializer.js +++ b/js/lib/mediawiki.WikitextSerializer.js @@ -146,7 +146,10 @@ tokens.push(chunk[ci]); } }); - p.on('end', function(){ }); + p.on('end', function(){ + // Save the tokens so the calling function can use them if necessary. + state.resultTokens = tokens; + }); // The code below will break if use async tokenization. p.processSync( prefixedText ); @@ -484,6 +487,32 @@ return ["<nowiki>", match[1], "</nowiki>", match[2]].join(''); } +/** + * Use the tokens generated by some text to figure out the portions that + * actually need to be escaped. + * + * @param {string} text + * @param {Token[]} tokens + * @returns {string} + */ +function intelligentlyEscapedText( text, tokens ) { + var i, token, tsr, escaped = ''; + + for ( i = 0; i < tokens.length; i++ ) { + token = tokens[i]; + if ( typeof token === 'string' ) { + escaped += token; + } else if ( token.name === undefined ) { + escaped += token; + } else { + tsr = token.dataAttribs.tsr; + escaped += '<nowiki>' + text.substring( tsr[0], tsr[1] ) + '</nowiki>'; + } + } + + return escaped; +} + WSP.escapeWikiText = function ( state, text ) { // console.warn("---EWT:ALL1---"); // console.warn("t: " + text); @@ -555,7 +584,7 @@ // Use the tokenizer to see if we have any wikitext tokens if (this.wteHandlers.hasWikitextTokens(state, sol, text) || hasTildes) { // console.warn("---EWT:DBG1---"); - return escapedText(text); + return intelligentlyEscapedText( text, state.resultTokens ); } else if (state.currLine.numPieces > 1) { // console.warn("---EWT:DBG2---"); // Last resort -- process current line text ignoring all embedded tags -- To view, visit https://gerrit.wikimedia.org/r/51784 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I420e545ca5f83aa00e1a8c7db81b4dd40a246cb8 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Parsoid Gerrit-Branch: master Gerrit-Owner: MarkTraceur <mtrac...@member.fsf.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits