Cenarium has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/272306

Change subject: Partial fix for stripped wikitext issue in references data
......................................................................

Partial fix for stripped wikitext issue in references data

Ref tags called with the {{#tag:|}} parser function can contain
<ref> tags or other core/extension defined tags. When references
data is retrieved during parse of the <ref> and <references> tags
(to be later saved in the database), the parser will return strip
markers in those cases instead of the original wikitext.
Recovering the original wikitext is not really possible at this
point, so this uses the ParserExtensionSubstitution hook added in
I8746b0c9d1ca215c316ebfe5d6b86618f7813577 to recover enough
information about the original wikitext to reconstruct wikitext
largely equivalent to the original.
This only works for <ref> tags and at one level of recursion.
So this addresses the largest use case, i.e. references nested
in notes.

Change-Id: I7ed38d9b38438cdf8647bb05587344ef4e30b3d7
---
M CiteHooks.php
M Cite_body.php
2 files changed, 65 insertions(+), 8 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Cite 
refs/changes/06/272306/1

diff --git a/CiteHooks.php b/CiteHooks.php
index 5622c7e..792469a 100644
--- a/CiteHooks.php
+++ b/CiteHooks.php
@@ -114,15 +114,36 @@
                                        // remove spaces or newlines added at 
the beginning or end of references
                                        $ref['text'] = trim( $ref['text'] );
 
-                                       // core and extension tags (including 
<ref>, <nowiki>, etc) inside {{#tag:ref}} calls
-                                       // get replaced by the parser with a 
strip marker before Cite::ref is called and the
-                                       // original wikitext cannot be 
recovered, so remove these useless strip markers
+                                       // core and extension tags (including 
<ref>, <nowiki>, etc)
+                                       // inside {{#tag:ref}} calls get 
replaced by the parser with
+                                       // a strip marker before Cite::ref is 
called, so we try to
+                                       // restore as much of the original 
wikitext as possible
+                                       // this only works for <ref> tags 
one-level deep
+                                       $subst = $refData['subst'];
+                                       foreach ( $subst as $num => $val ) {
+                                               $str = '<' . $val['name'];
+                                               foreach ( $val['attributes'] as 
$attrName => $attrValue ) {
+                                                       $str .= ' ' . $attrName 
. '="' . $attrValue . '"';
+                                               }
+                                               $str .= '>' . $val['content'] . 
'</' . $val['name'] . '>';
+                                               $count = 0;
+                                               $ref['text'] = str_replace( 
$val['marker'], $str, $ref['text'], $count );
+                                               if ( $count ) {
+                                                       // let callers know 
about the restored wikitext content that
+                                                       // is not necessarily 
exactly the same as the original
+                                                       
$ref['markers-restored'] = true;
+                                                       unset( 
$refData['subst'][$num] );
+                                               }
+                                       }
+
+                                       // strip markers due to other tags or 
doubly nested refs may still remain
+                                       // so remove them
                                        $stripState = new StripState();
                                        $text = $stripState->killMarkers( 
$ref['text'] );
                                        if ( $ref['text'] !== $text ) {
                                                $ref['text'] = $text;
                                                // let callers know about the 
removed strip markers
-                                               $ref['markers'] = true;
+                                               $ref['markers-removed'] = true;
                                        }
 
                                        // return more meaningful count
@@ -140,6 +161,7 @@
                        }
                }
                $refData['refs'] = $refs;
+               unset( $refData['subst'] ); // no longer needed
                return $refData;
        }
 
diff --git a/Cite_body.php b/Cite_body.php
index 53a8c0a..4bf0a46 100644
--- a/Cite_body.php
+++ b/Cite_body.php
@@ -1193,10 +1193,7 @@
                $savedRefs = $this->mParser->getOutput()->getExtensionData( 
self::EXT_DATA_KEY );
                if ( $savedRefs === null ) {
                        // Initialize array structure
-                       $savedRefs = array(
-                               'refs' => array(),
-                               'version' => self::DATA_VERSION_NUMBER,
-                       );
+                       $savedRefs = self::initializeRefData();
                }
                if ( $this->mBumpRefData ) {
                        // This handles pages with multiple <references/> tags 
with <ref> tags in between.
@@ -1210,6 +1207,43 @@
                $savedRefs['refs'][$n][$group] = $this->mRefs[$group];
 
                $this->mParser->getOutput()->setExtensionData( 
self::EXT_DATA_KEY, $savedRefs );
+       }
+
+       public static function saveSubstitutionData( $parser, $name, 
$attributes, $content,     $marker ) {
+               global $wgCiteStoreReferencesData;
+               if ( !$wgCiteStoreReferencesData ) {
+                       return;
+               }
+               // allow only 'ref' tags 
+               if ( $name !== 'ref' ) {
+                       return;
+               }
+               // only save substitution data if we're inside a 
{{#tag:ref|...}} call
+               $tagParserFunctionCalls = 
$parser->getIsInsideTagParserFunctionCall();
+               if ( !isset( $tagParserFunctionCalls['ref'] ) ) {
+                       return;
+               }
+
+               $savedRefs = $parser->getOutput()->getExtensionData( 
self::EXT_DATA_KEY );
+               if ( $savedRefs === null ) {
+                       // Initialize array structure
+                       $savedRefs = self::initializeRefData();
+               }
+               $savedRefs['subst'][] = array(
+                       'name' => $name, 
+                       'attributes' => $attributes,
+                       'content' => $content, 
+                       'marker' => $marker,
+               );
+               $parser->getOutput()->setExtensionData( self::EXT_DATA_KEY, 
$savedRefs );
+       }
+
+       private static function initializeRefData() {
+               return array(
+                       'refs' => array(),
+                       'subst' => array(),
+                       'version' => self::DATA_VERSION_NUMBER,
+               );
        }
 
        /**
@@ -1245,6 +1279,7 @@
                        $wgHooks['ParserAfterParse'][] = array( 
$parser->extCite, 'checkRefsNoReferences', true );
                        $wgHooks['ParserBeforeTidy'][] = array( 
$parser->extCite, 'checkRefsNoReferences', false );
                        $wgHooks['InlineEditorPartialAfterParse'][] = array( 
$parser->extCite, 'checkAnyCalls' );
+                       $wgHooks['ParserExtensionSubstitution'][] = array( 
$parser->extCite, 'saveSubstitutionData' );
                        Cite::$hooksInstalled = true;
                }
                $parser->setHook( 'ref', array( $parser->extCite, 'ref' ) );

-- 
To view, visit https://gerrit.wikimedia.org/r/272306
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7ed38d9b38438cdf8647bb05587344ef4e30b3d7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Cite
Gerrit-Branch: master
Gerrit-Owner: Cenarium <cenarium.sy...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to