Cenarium has uploaded a new change for review. https://gerrit.wikimedia.org/r/272306
Change subject: Partial fix for stripped wikitext issue in references data ...................................................................... Partial fix for stripped wikitext issue in references data Ref tags called with the {{#tag:|}} parser function can contain <ref> tags or other core/extension defined tags. When references data is retrieved during parse of the <ref> and <references> tags (to be later saved in the database), the parser will return strip markers in those cases instead of the original wikitext. Recovering the original wikitext is not really possible at this point, so this uses the ParserExtensionSubstitution hook added in I8746b0c9d1ca215c316ebfe5d6b86618f7813577 to recover enough information about the original wikitext to reconstruct wikitext largely equivalent to the original. This only works for <ref> tags and at one level of recursion. So this addresses the largest use case, i.e. references nested in notes. Change-Id: I7ed38d9b38438cdf8647bb05587344ef4e30b3d7 --- M CiteHooks.php M Cite_body.php 2 files changed, 65 insertions(+), 8 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Cite refs/changes/06/272306/1 diff --git a/CiteHooks.php b/CiteHooks.php index 5622c7e..792469a 100644 --- a/CiteHooks.php +++ b/CiteHooks.php @@ -114,15 +114,36 @@ // remove spaces or newlines added at the beginning or end of references $ref['text'] = trim( $ref['text'] ); - // core and extension tags (including <ref>, <nowiki>, etc) inside {{#tag:ref}} calls - // get replaced by the parser with a strip marker before Cite::ref is called and the - // original wikitext cannot be recovered, so remove these useless strip markers + // core and extension tags (including <ref>, <nowiki>, etc) + // inside {{#tag:ref}} calls get replaced by the parser with + // a strip marker before Cite::ref is called, so we try to + // restore as much of the original wikitext as possible + // this only works for <ref> tags one-level deep + $subst = $refData['subst']; + foreach ( $subst as $num => $val ) { + $str = '<' . $val['name']; + foreach ( $val['attributes'] as $attrName => $attrValue ) { + $str .= ' ' . $attrName . '="' . $attrValue . '"'; + } + $str .= '>' . $val['content'] . '</' . $val['name'] . '>'; + $count = 0; + $ref['text'] = str_replace( $val['marker'], $str, $ref['text'], $count ); + if ( $count ) { + // let callers know about the restored wikitext content that + // is not necessarily exactly the same as the original + $ref['markers-restored'] = true; + unset( $refData['subst'][$num] ); + } + } + + // strip markers due to other tags or doubly nested refs may still remain + // so remove them $stripState = new StripState(); $text = $stripState->killMarkers( $ref['text'] ); if ( $ref['text'] !== $text ) { $ref['text'] = $text; // let callers know about the removed strip markers - $ref['markers'] = true; + $ref['markers-removed'] = true; } // return more meaningful count @@ -140,6 +161,7 @@ } } $refData['refs'] = $refs; + unset( $refData['subst'] ); // no longer needed return $refData; } diff --git a/Cite_body.php b/Cite_body.php index 53a8c0a..4bf0a46 100644 --- a/Cite_body.php +++ b/Cite_body.php @@ -1193,10 +1193,7 @@ $savedRefs = $this->mParser->getOutput()->getExtensionData( self::EXT_DATA_KEY ); if ( $savedRefs === null ) { // Initialize array structure - $savedRefs = array( - 'refs' => array(), - 'version' => self::DATA_VERSION_NUMBER, - ); + $savedRefs = self::initializeRefData(); } if ( $this->mBumpRefData ) { // This handles pages with multiple <references/> tags with <ref> tags in between. @@ -1210,6 +1207,43 @@ $savedRefs['refs'][$n][$group] = $this->mRefs[$group]; $this->mParser->getOutput()->setExtensionData( self::EXT_DATA_KEY, $savedRefs ); + } + + public static function saveSubstitutionData( $parser, $name, $attributes, $content, $marker ) { + global $wgCiteStoreReferencesData; + if ( !$wgCiteStoreReferencesData ) { + return; + } + // allow only 'ref' tags + if ( $name !== 'ref' ) { + return; + } + // only save substitution data if we're inside a {{#tag:ref|...}} call + $tagParserFunctionCalls = $parser->getIsInsideTagParserFunctionCall(); + if ( !isset( $tagParserFunctionCalls['ref'] ) ) { + return; + } + + $savedRefs = $parser->getOutput()->getExtensionData( self::EXT_DATA_KEY ); + if ( $savedRefs === null ) { + // Initialize array structure + $savedRefs = self::initializeRefData(); + } + $savedRefs['subst'][] = array( + 'name' => $name, + 'attributes' => $attributes, + 'content' => $content, + 'marker' => $marker, + ); + $parser->getOutput()->setExtensionData( self::EXT_DATA_KEY, $savedRefs ); + } + + private static function initializeRefData() { + return array( + 'refs' => array(), + 'subst' => array(), + 'version' => self::DATA_VERSION_NUMBER, + ); } /** @@ -1245,6 +1279,7 @@ $wgHooks['ParserAfterParse'][] = array( $parser->extCite, 'checkRefsNoReferences', true ); $wgHooks['ParserBeforeTidy'][] = array( $parser->extCite, 'checkRefsNoReferences', false ); $wgHooks['InlineEditorPartialAfterParse'][] = array( $parser->extCite, 'checkAnyCalls' ); + $wgHooks['ParserExtensionSubstitution'][] = array( $parser->extCite, 'saveSubstitutionData' ); Cite::$hooksInstalled = true; } $parser->setHook( 'ref', array( $parser->extCite, 'ref' ) ); -- To view, visit https://gerrit.wikimedia.org/r/272306 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7ed38d9b38438cdf8647bb05587344ef4e30b3d7 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Cite Gerrit-Branch: master Gerrit-Owner: Cenarium <cenarium.sy...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits