Burthsceh has uploaded a new change for review. https://gerrit.wikimedia.org/r/113943
Change subject: fix escaping fragment of Title ...................................................................... fix escaping fragment of Title Some normalization are added at r18513, r44000 and r55382. Fragments of title objects are also normalized. But, headings aren't normalized. So make fragments non-normalized. bug: 18431 Change-Id: I4738195b1a219435b84d372b3b317a888ba0ccbf --- M includes/Title.php M tests/phpunit/includes/TitleTest.php 2 files changed, 50 insertions(+), 13 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/43/113943/1 diff --git a/includes/Title.php b/includes/Title.php index 2862685..225a171 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -1237,7 +1237,7 @@ * @param string $fragment text */ public function setFragment( $fragment ) { - $this->mFragment = str_replace( '_', ' ', substr( $fragment, 1 ) ); + $this->mFragment = Sanitizer::normalizeSectionNameWhitespace( substr( $fragment, 1 ) ); } /** @@ -3209,6 +3209,17 @@ $dbkey = $this->mDbkeyform; + $fragment = strstr( $dbkey, '#' ); + if ( false !== $fragment ) { + if ( strpos( $fragment, UTF8_REPLACEMENT ) !== false ) { + # Contained illegal UTF-8 sequences or forbidden Unicode chars. + return false; + } + + $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) ); + $this->setFragment( $fragment ); + } + # Strip Unicode bidi override characters. # Sometimes they slip into cut-n-pasted page titles, where the # override chars get included in list displays. @@ -3236,7 +3247,7 @@ $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace } - if ( $dbkey == '' ) { + if ( $dbkey == '' && $fragment === false ) { return false; } @@ -3276,7 +3287,7 @@ if ( $wgLocalInterwiki !== false && 0 == strcasecmp( $this->mInterwiki, $wgLocalInterwiki ) ) { - if ( $dbkey == '' ) { + if ( $dbkey == '' && $fragment === false ) { # Can't have an empty self-link return false; } @@ -3298,15 +3309,6 @@ } break; } while ( true ); - - $fragment = strstr( $dbkey, '#' ); - if ( false !== $fragment ) { - $this->setFragment( $fragment ); - $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) ); - # remove whitespace again: prevents "Foo_bar_#" - # becoming "Foo_bar_" - $dbkey = preg_replace( '/_*$/', '', $dbkey ); - } # Reject illegal characters. $rxTc = self::getTitleInvalidRegex(); diff --git a/tests/phpunit/includes/TitleTest.php b/tests/phpunit/includes/TitleTest.php index 078dfef..721f256 100644 --- a/tests/phpunit/includes/TitleTest.php +++ b/tests/phpunit/includes/TitleTest.php @@ -83,7 +83,9 @@ 'remotetestiw: Talk: # anchor', 'remotetestiw: #bar', 'remotetestiw: Talk:', - 'remotetestiw: Talk: Foo' + 'remotetestiw: Talk: Foo', + // anchor + 'remotetestiw : Talk: F oo#__ _a~~~nchÊA;o r__ _' ) as $text ) { $this->assertInstanceOf( 'Title', Title::newFromText( $text ), "Valid: $text" ); } @@ -511,4 +513,37 @@ array( 'User:John_Doe/subOne', 'subOne' ), ); } + + /** + * @dataProvider provideConvertByteClassToUnicodeClass + * @covers Title::getFragmentForURL + */ + public function testGetFragmentForURL( $title, $expected, $msg = '' ) { + $title = Title::newFromText( $title ); + $this->assertEquals( $expected, $title->getFragmentForURL(), $msg ); + } + + public static function provideGetFragmentForURL() { + return array( + # Title, expected base, optional message + array( 'Foo', '' ), + // Bug 17006 + //array( 'Foo#', '#' ), + //array( '#', '#' ), + array( '#ANCHOR __ _ anchor', '#ANCHOR_anchor' ), + array( '#anchor:(&"18$!+\\', '#anchor:.28.26.2218.24.21.2B.5C' ), + array( + '#a#[]{}<>|::~~~/../  ᠎z', + '#a.23.5B.5D.7B.7D.3C.3E.7C::.7E.7E.7E.2F...2F.C2.A0.E1.9A.80.E1.A0.8Ez' + ), + array( + "#a \n\r\t\v\f\xE2\x80\x8E\xE2\x80\xAAz", + 'a_.0A.0D.09.EF.BF.BD.EF.BF.BD.E2.80.8E.E2.80.AAz' + ), + array( + '#a  

   z', + '#a.E2.80.82.E2.80.8A.E2.80.A8.E2.80.A9.E2.80.AF.E2.81.9F.E3.80.80z' + ), + ); + } } -- To view, visit https://gerrit.wikimedia.org/r/113943 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I4738195b1a219435b84d372b3b317a888ba0ccbf Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Burthsceh <burths...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits