jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/388365 )

Change subject: Remove nbsp and similar characters from section IDs
......................................................................


Remove nbsp and similar characters from section IDs

Bug: T90902
Change-Id: I71bdb7dd43c3e532287290e3c691d9739da45475
---
M RELEASE-NOTES-1.31
M includes/parser/Parser.php
M tests/parser/parserTests.txt
3 files changed, 39 insertions(+), 0 deletions(-)

Approvals:
  C. Scott Ananian: Looks good to me, but someone else must approve
  jenkins-bot: Verified
  Kaldari: Looks good to me, approved



diff --git a/RELEASE-NOTES-1.31 b/RELEASE-NOTES-1.31
index 4bfcfcb..3688163 100644
--- a/RELEASE-NOTES-1.31
+++ b/RELEASE-NOTES-1.31
@@ -41,6 +41,7 @@
 * …
 
 === Bug fixes in 1.31 ===
+* (T90902) Non-breaking space in header ID breaks anchor
 * …
 
 === Action API changes in 1.31 ===
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index f2e47dc..3548da9 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -4206,6 +4206,9 @@
 
                        # Decode HTML entities
                        $safeHeadline = Sanitizer::decodeCharReferences( 
$safeHeadline );
+
+                       $safeHeadline = $this->normalizeSectionName( 
$safeHeadline );
+
                        $fallbackHeadline = Sanitizer::escapeIdForAttribute( 
$safeHeadline, Sanitizer::ID_FALLBACK );
                        $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline 
);
                        $safeHeadline = Sanitizer::escapeIdForAttribute( 
$safeHeadline, Sanitizer::ID_PRIMARY );
@@ -5767,6 +5770,8 @@
                $text = $this->stripSectionName( $text );
                $text = Sanitizer::normalizeSectionNameWhitespace( $text );
                $text = Sanitizer::decodeCharReferences( $text );
+               $text = $this->normalizeSectionName( $text );
+
                return '#' . Sanitizer::escapeIdForLink( $text );
        }
 
@@ -5786,6 +5791,7 @@
                $text = $this->stripSectionName( $text );
                $text = Sanitizer::normalizeSectionNameWhitespace( $text );
                $text = Sanitizer::decodeCharReferences( $text );
+               $text = $this->normalizeSectionName( $text );
 
                if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 
'legacy' ) {
                        // ForAttribute() and ForLink() are the same for legacy 
encoding
@@ -5798,6 +5804,24 @@
        }
 
        /**
+        * Apply the same normalization as code making links to this section 
would
+        *
+        * @param string $text
+        * @return string
+        */
+       private function normalizeSectionName( $text ) {
+               # T90902: ensure the same normalization is applied for IDs as 
to links
+               $titleParser = 
MediaWikiServices::getInstance()->getTitleParser();
+               try {
+
+                       $parts = $titleParser->splitTitleString( "#$text" );
+               } catch ( MalformedTitleException $ex ) {
+                       return $text;
+               }
+               return $parts['fragment'];
+       }
+
+       /**
         * Strips a text string of wikitext for use in a section anchor
         *
         * Accepts a text string and then removes all wikitext from the
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 3c861ea..1204dbd 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -29536,3 +29536,17 @@
 </p><p><a href="#啤酒">#啤酒</a> <a href="#啤酒">#啤酒</a>
 </p>
 !! end
+
+!! test
+T90902: Normalize weird characters in section IDs
+!! config
+wgFragmentMode=[ 'html5', 'legacy' ]
+!! wikitext
+== Foo&nbsp;bar ==
+[[#Foo&nbsp;bar]]
+
+!! html/php
+<h2><span class="mw-headline" id="Foo_bar">Foo&#160;bar</span><span 
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a 
href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit 
section: Foo bar">edit</a><span 
class="mw-editsection-bracket">]</span></span></h2>
+<p><a href="#Foo_bar">#Foo&#160;bar</a>
+</p>
+!! end

-- 
To view, visit https://gerrit.wikimedia.org/r/388365
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I71bdb7dd43c3e532287290e3c691d9739da45475
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: MaxSem <[email protected]>
Gerrit-Reviewer: C. Scott Ananian <[email protected]>
Gerrit-Reviewer: Jackmcbarn <[email protected]>
Gerrit-Reviewer: Kaldari <[email protected]>
Gerrit-Reviewer: MaxSem <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to