Thiemo Mättig (WMDE) has uploaded a new change for review. https://gerrit.wikimedia.org/r/148113
Change subject: Make MonthNameUnlocalizer aware of genitive month names ...................................................................... Make MonthNameUnlocalizer aware of genitive month names See Ic7a5797. This includes a major rewrite of the method. Now it tries to unlocalize longer strings first. This should avoid all possible conflicts. Change-Id: I232fd3b5433f04396c53eecf3ad49d025a84ee64 --- M lib/includes/parsers/MonthNameUnlocalizer.php M lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php 2 files changed, 37 insertions(+), 25 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/13/148113/1 diff --git a/lib/includes/parsers/MonthNameUnlocalizer.php b/lib/includes/parsers/MonthNameUnlocalizer.php index ef6fd9e..055c805 100644 --- a/lib/includes/parsers/MonthNameUnlocalizer.php +++ b/lib/includes/parsers/MonthNameUnlocalizer.php @@ -21,26 +21,27 @@ * @see Unlocalizer::unlocalize() * * @param string $string string to process - * @param string $langCode + * @param string $languageCode * @param ParserOptions $options * * @return string unlocalized string */ - public function unlocalize( $string, $langCode, ParserOptions $options ) { - if( $langCode === 'en' ) { + public function unlocalize( $string, $languageCode, ParserOptions $options ) { + if ( $languageCode === 'en' ) { return $string; } - $lang = Language::factory( $langCode ); + $language = Language::factory( $languageCode ); $en = Language::factory( 'en' ); - $string = $this->unlocalizeMonthNames( $lang, $en, $string ); + $string = $this->unlocalizeMonthNames( $language, $en, $string ); return $string; } /** - * Unlocalizes month names in a string, checking both full month names and abbreviations + * Unlocalizes month names in a string, checking full month names, genitives and abbreviations. + * * @param Language $from * @param Language $to * @param string $string @@ -48,21 +49,30 @@ * @return string */ private function unlocalizeMonthNames( Language $from, Language $to, $string ) { - $initialString = $string; + $replacements = array(); for ( $i = 1; $i <= 12; $i++ ) { - $string = str_replace( $from->getMonthName( $i ), $to->getMonthName( $i ), $string ); + $replace = $to->getMonthName( $i ); + + $replacements[$from->getMonthName( $i )] = $replace; + $replacements[$from->getMonthNameGen( $i )] = $replace; + $replacements[$from->getMonthAbbreviation( $i )] = $replace; } - if( $string !== $initialString ) { - return $string; - } + // Order search strings from longest to shortest + uksort( $replacements, function( $a, $b ) { + return strlen( $b ) - strlen( $a ); + } ); - for ( $i = 1; $i <= 12; $i++ ) { - $string = str_replace( $from->getMonthAbbreviation( $i ), $to->getMonthName( $i ), $string ); + foreach ( $replacements as $search => $replace ) { + $unlocalized = str_replace( $search, $replace, $string, $count ); + + if ( $count === 1 ) { + return $unlocalized; + } } return $string; } -} \ No newline at end of file +} diff --git a/lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php b/lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php index 7a47e43..9251c9f 100644 --- a/lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php +++ b/lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php @@ -20,28 +20,30 @@ public function provideUnlocalize() { $testCases = array( - - //Should unlocalize dates + // Should unlocalize dates array( '1 Juli 2013', 'de', '1 July 2013' ), + array( '1 Julis 2013', 'de', '1 July 2013' ), array( '1 Januarie 1999', 'af', '1 January 1999' ), array( '1 Jan 1999', 'af', '1 January 1999' ), array( '16 Jenna 1999', 'bar', '16 January 1999' ), - //Shouldn#t do anything if we cant / don#t need to + // Shouldn't do anything if we can't or don't need to array( '1 June 2013', 'en', '1 June 2013' ), array( '1 Jan 2013', 'en', '1 Jan 2013' ), + array( '1 January 1999', 'en', '1 January 1999' ), array( '16 FooBarBarxxx 1999', 'bar', '16 FooBarBarxxx 1999' ), - + array( 'Juli Juli', 'de', 'Juli Juli' ), ); - //Loop through some other languages - $someLangs = array( 'war', 'ceb', 'uk', 'ru', 'de' ); + // Loop through some other languages + $languageCodes = array( 'war', 'ceb', 'uk', 'ru', 'de' ); $en = Language::factory( 'en' ); - foreach( $someLangs as $from ) { + foreach ( $languageCodes as $from ) { $fromLang = Language::factory( $from ); for ( $i = 1; $i <= 12; $i++ ) { $testCases[] = array( $fromLang->getMonthName( $i ), $from, $en->getMonthName( $i ) ); + $testCases[] = array( $fromLang->getMonthNameGen( $i ), $from, $en->getMonthName( $i ) ); $testCases[] = array( $fromLang->getMonthAbbreviation( $i ), $from, $en->getMonthName( $i ) ); } } @@ -53,16 +55,16 @@ * @dataProvider provideUnlocalize * * @param $localized - * @param $lang + * @param $languageCode * @param $expected */ - public function testUnlocalize( $localized, $lang, $expected ) { + public function testUnlocalize( $localized, $languageCode, $expected ) { $monthUnlocalizer = new MonthNameUnlocalizer(); $options = new ParserOptions(); - $actual = $monthUnlocalizer->unlocalize( $localized, $lang, $options ); + $actual = $monthUnlocalizer->unlocalize( $localized, $languageCode, $options ); $this->assertEquals( $expected, $actual ); } -} \ No newline at end of file +} -- To view, visit https://gerrit.wikimedia.org/r/148113 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I232fd3b5433f04396c53eecf3ad49d025a84ee64 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits