Arlolra has uploaded a new change for review. https://gerrit.wikimedia.org/r/210821
Change subject: html2wt should not need access to original source ...................................................................... html2wt should not need access to original source Bug: T96923 Change-Id: I104982e061abf20c63a6342caa5600a090c9ced7 --- M lib/dom.cleanup.js M lib/mediawiki.WikitextSerializer.js M lib/wts.SerializerState.js M lib/wts.TagHandlers.js 4 files changed, 25 insertions(+), 32 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/21/210821/1 diff --git a/lib/dom.cleanup.js b/lib/dom.cleanup.js index e888aab..1af70c0 100644 --- a/lib/dom.cleanup.js +++ b/lib/dom.cleanup.js @@ -91,11 +91,14 @@ DU.setDataMw( node, undefined ); } - // Remove dp.src from elements that have valid data-mw and dsr. This - // should reduce data-parsoid bloat. - if ( validDataMW && Util.isValidDSR(dp.dsr) ) { + // Remove dp.src from elements that have valid data-mw and dsr. + // This should reduce data-parsoid bloat. + var validDSR = validDataMW && Util.isValidDSR(dp.dsr); + var isPageProp = (node.nodeName === 'META' && + /^mw\:PageProp\/(.*)$/.test(node.getAttribute('property'))); + if (validDSR && !isPageProp) { dp.src = undefined; - } else if ( tplInfo && tplInfo.first === node && (!atTopLevel || !dp.tsr) ) { + } else if (tplInfo && tplInfo.first === node && (!atTopLevel || !dp.tsr)) { // Transcluded nodes will not have dp.tsr set // and don't need dp.src either. dp.src = undefined; diff --git a/lib/mediawiki.WikitextSerializer.js b/lib/mediawiki.WikitextSerializer.js index 7e3d796..d51f4c5 100644 --- a/lib/mediawiki.WikitextSerializer.js +++ b/lib/mediawiki.WikitextSerializer.js @@ -650,8 +650,7 @@ if (/(?:^|\s)mw:Transclusion(?=$|\s)/.test(typeOf)) { dataMW = DU.getDataMw(node); if (dataMW.parts) { - src = state.serializer._buildTemplateWT(node, - state, dataMW.parts ); + src = state.serializer._buildTemplateWT(node, state, dataMW.parts); } else if (dp.src) { self.env.log("error", "data-mw missing in: " + node.outerHTML); src = dp.src; @@ -659,7 +658,11 @@ throw new Error("Cannot serialize transclusion without data-mw.parts or data-parsoid.src."); } } else if (/(?:^|\s)mw:Param(?=$|\s)/.test(typeOf)) { - src = dp.src; + if (dp.src) { + src = dp.src; + } else { + throw new Error("No source for params."); + } } else if (/(?:^|\s)mw:Extension\/LabeledSectionTransclusion/.test(typeOf)) { // FIXME: Special case for <section> until LST is implemented // natively in Parsoid @@ -698,24 +701,10 @@ throw new Error("Should never reach here"); } - // FIXME: Just adding this here temporarily till we go in and - // clean this up and strip this out if we can verify that data-mw - // is going to be present always when necessary and indicate that - // a missing data-mw is either a parser bug or a client error. - // - // Fallback: should be exercised only in exceptional situations. - if (src === undefined && state.env.page.src && Util.isValidDSR(dp.dsr) && !dp.fostered) { - src = state.getOrigSrc(dp.dsr[0], dp.dsr[1]); - } - if (src !== undefined) { - state.singleLineContext.disable(); - self.emitWikitext(src, state, cb, node); - state.singleLineContext.pop(); - return DU.skipOverEncapsulatedContent(node); - } else { - self.env.log( "error", "Serializing as HTML. No handler for: " + node.outerHTML ); - return self._htmlElementHandler(node, state, cb); - } + state.singleLineContext.disable(); + self.emitWikitext(src, state, cb, node); + state.singleLineContext.pop(); + return DU.skipOverEncapsulatedContent(node); }, sepnls: { // XXX: This is questionable, as the template can expand diff --git a/lib/wts.SerializerState.js b/lib/wts.SerializerState.js index a16c9b7..257677f 100644 --- a/lib/wts.SerializerState.js +++ b/lib/wts.SerializerState.js @@ -181,6 +181,7 @@ }; SSP.getOrigSrc = function(start, end) { + console.assert(this.selserMode); return this.env.page.src.substring(start, end); }; diff --git a/lib/wts.TagHandlers.js b/lib/wts.TagHandlers.js index 9af1be9..6388d1e 100644 --- a/lib/wts.TagHandlers.js +++ b/lib/wts.TagHandlers.js @@ -931,17 +931,15 @@ // Check for property before type so that page properties with templated attrs // roundtrip properly. Ex: {{DEFAULTSORT:{{echo|foo}} }} - if ( property ) { - var switchType = property.match( /^mw\:PageProp\/(.*)$/ ); - if ( switchType ) { + if (property) { + var switchType = property.match(/^mw\:PageProp\/(.*)$/); + if (switchType) { var out = switchType[1]; var cat = out.match(/^(?:category)?(.*)/); if ( cat && Util.magicMasqs.has(cat[1]) ) { - var wtSource = DU.getWTSource(state.env, node); - // Use content so that VE modifications are preserved var contentInfo = state.serializer.serializedAttrVal(node, "content", {}); - if (wtSource) { - out = wtSource.replace(/^([^:]+:)(.*)$/, "$1" + contentInfo.value + "}}"); + if (dp.src) { + out = dp.src.replace(/^([^:]+:)(.*)$/, "$1" + contentInfo.value + "}}"); } else { var magicWord = cat[1].toUpperCase(); state.env.log("warning", cat[1] + ' is missing source. Rendering as ' + magicWord + ' magicword'); @@ -951,6 +949,8 @@ out = state.env.conf.wiki.getMagicWordWT( switchType[1], dp.magicSrc ) || ''; } cb(out, node); + } else { + state.serializer._htmlElementHandler(node, state, cb); } } else if ( type ) { switch ( type ) { -- To view, visit https://gerrit.wikimedia.org/r/210821 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I104982e061abf20c63a6342caa5600a090c9ced7 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits