Burthsceh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/113943

Change subject: fix escaping fragment of Title
......................................................................

fix escaping fragment of Title

Some normalization are added at r18513, r44000 and r55382.
Fragments of title objects are also normalized.
But, headings aren't normalized.
So make fragments non-normalized.

bug: 18431
Change-Id: I4738195b1a219435b84d372b3b317a888ba0ccbf
---
M includes/Title.php
M tests/phpunit/includes/TitleTest.php
2 files changed, 50 insertions(+), 13 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/43/113943/1

diff --git a/includes/Title.php b/includes/Title.php
index 2862685..225a171 100644
--- a/includes/Title.php
+++ b/includes/Title.php
@@ -1237,7 +1237,7 @@
         * @param string $fragment text
         */
        public function setFragment( $fragment ) {
-               $this->mFragment = str_replace( '_', ' ', substr( $fragment, 1 
) );
+               $this->mFragment = Sanitizer::normalizeSectionNameWhitespace( 
substr( $fragment, 1 ) );
        }
 
        /**
@@ -3209,6 +3209,17 @@
 
                $dbkey = $this->mDbkeyform;
 
+               $fragment = strstr( $dbkey, '#' );
+               if ( false !== $fragment ) {
+                       if ( strpos( $fragment, UTF8_REPLACEMENT ) !== false ) {
+                               # Contained illegal UTF-8 sequences or 
forbidden Unicode chars.
+                               return false;
+                       }
+
+                       $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( 
$fragment ) );
+                       $this->setFragment( $fragment );
+               }
+
                # Strip Unicode bidi override characters.
                # Sometimes they slip into cut-n-pasted page titles, where the
                # override chars get included in list displays.
@@ -3236,7 +3247,7 @@
                        $dbkey = trim( $dbkey, '_' ); # remove any subsequent 
whitespace
                }
 
-               if ( $dbkey == '' ) {
+               if ( $dbkey == '' && $fragment === false ) {
                        return false;
                }
 
@@ -3276,7 +3287,7 @@
                                        if ( $wgLocalInterwiki !== false
                                                && 0 == strcasecmp( 
$this->mInterwiki, $wgLocalInterwiki )
                                        ) {
-                                               if ( $dbkey == '' ) {
+                                               if ( $dbkey == '' && $fragment 
=== false ) {
                                                        # Can't have an empty 
self-link
                                                        return false;
                                                }
@@ -3298,15 +3309,6 @@
                        }
                        break;
                } while ( true );
-
-               $fragment = strstr( $dbkey, '#' );
-               if ( false !== $fragment ) {
-                       $this->setFragment( $fragment );
-                       $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( 
$fragment ) );
-                       # remove whitespace again: prevents "Foo_bar_#"
-                       # becoming "Foo_bar_"
-                       $dbkey = preg_replace( '/_*$/', '', $dbkey );
-               }
 
                # Reject illegal characters.
                $rxTc = self::getTitleInvalidRegex();
diff --git a/tests/phpunit/includes/TitleTest.php 
b/tests/phpunit/includes/TitleTest.php
index 078dfef..721f256 100644
--- a/tests/phpunit/includes/TitleTest.php
+++ b/tests/phpunit/includes/TitleTest.php
@@ -83,7 +83,9 @@
                        'remotetestiw: Talk: # anchor',
                        'remotetestiw: #bar',
                        'remotetestiw: Talk:',
-                       'remotetestiw: Talk: Foo'
+                       'remotetestiw: Talk: Foo',
+                       // anchor
+                       'remotetestiw : Talk: F oo#__ 
_a~~~nch&#202A;o r__ _'
                ) as $text ) {
                        $this->assertInstanceOf( 'Title', Title::newFromText( 
$text ), "Valid: $text" );
                }
@@ -511,4 +513,37 @@
                        array( 'User:John_Doe/subOne', 'subOne' ),
                );
        }
+
+       /**
+        * @dataProvider provideConvertByteClassToUnicodeClass
+        * @covers Title::getFragmentForURL
+        */
+       public function testGetFragmentForURL( $title, $expected, $msg = '' ) {
+               $title = Title::newFromText( $title );
+               $this->assertEquals( $expected, $title->getFragmentForURL(), 
$msg );
+       }
+
+       public static function provideGetFragmentForURL() {
+               return array(
+                       # Title, expected base, optional message
+                       array( 'Foo', '' ),
+                       // Bug 17006
+                       //array( 'Foo#', '#' ),
+                       //array( '#', '#' ),
+                       array( '#ANCHOR __ _  anchor', '#ANCHOR_anchor' ),
+                       array( '#anchor:(&"18$!+\\', 
'#anchor:.28.26.2218.24.21.2B.5C' ),
+                       array(
+                               
'#a#[]{}<>|::~~~/../  ᠎z',
+                               
'#a.23.5B.5D.7B.7D.3C.3E.7C::.7E.7E.7E.2F...2F.C2.A0.E1.9A.80.E1.A0.8Ez'
+                       ),
+                       array(
+                               "#a \n\r\t\v\f\xE2\x80\x8E\xE2\x80\xAAz",
+                               
'a_.0A.0D.09.EF.BF.BD.EF.BF.BD.E2.80.8E.E2.80.AAz'
+                       ),
+                       array(
+                               
'#a  

   z',
+                               
'#a.E2.80.82.E2.80.8A.E2.80.A8.E2.80.A9.E2.80.AF.E2.81.9F.E3.80.80z'
+                       ),
+               );
+       }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/113943
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4738195b1a219435b84d372b3b317a888ba0ccbf
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Burthsceh <burths...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to