Thiemo Mättig (WMDE) has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/148113

Change subject: Make MonthNameUnlocalizer aware of genitive month names
......................................................................

Make MonthNameUnlocalizer aware of genitive month names

See Ic7a5797.

This includes a major rewrite of the method. Now it tries to
unlocalize longer strings first. This should avoid all possible
conflicts.

Change-Id: I232fd3b5433f04396c53eecf3ad49d025a84ee64
---
M lib/includes/parsers/MonthNameUnlocalizer.php
M lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php
2 files changed, 37 insertions(+), 25 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase 
refs/changes/13/148113/1

diff --git a/lib/includes/parsers/MonthNameUnlocalizer.php 
b/lib/includes/parsers/MonthNameUnlocalizer.php
index ef6fd9e..055c805 100644
--- a/lib/includes/parsers/MonthNameUnlocalizer.php
+++ b/lib/includes/parsers/MonthNameUnlocalizer.php
@@ -21,26 +21,27 @@
         * @see Unlocalizer::unlocalize()
         *
         * @param string $string string to process
-        * @param string $langCode
+        * @param string $languageCode
         * @param ParserOptions $options
         *
         * @return string unlocalized string
         */
-       public function unlocalize( $string, $langCode, ParserOptions $options 
) {
-               if( $langCode === 'en' ) {
+       public function unlocalize( $string, $languageCode, ParserOptions 
$options ) {
+               if ( $languageCode === 'en' ) {
                        return $string;
                }
 
-               $lang = Language::factory( $langCode );
+               $language = Language::factory( $languageCode );
                $en = Language::factory( 'en' );
 
-               $string = $this->unlocalizeMonthNames( $lang, $en, $string );
+               $string = $this->unlocalizeMonthNames( $language, $en, $string 
);
 
                return $string;
        }
 
        /**
-        * Unlocalizes month names in a string, checking both full month names 
and abbreviations
+        * Unlocalizes month names in a string, checking full month names, 
genitives and abbreviations.
+        *
         * @param Language $from
         * @param Language $to
         * @param string $string
@@ -48,21 +49,30 @@
         * @return string
         */
        private function unlocalizeMonthNames( Language $from, Language $to, 
$string ) {
-               $initialString = $string;
+               $replacements = array();
 
                for ( $i = 1; $i <= 12; $i++ ) {
-                       $string = str_replace( $from->getMonthName( $i ), 
$to->getMonthName( $i ), $string );
+                       $replace = $to->getMonthName( $i );
+
+                       $replacements[$from->getMonthName( $i )] = $replace;
+                       $replacements[$from->getMonthNameGen( $i )] = $replace;
+                       $replacements[$from->getMonthAbbreviation( $i )] = 
$replace;
                }
 
-               if( $string !== $initialString ) {
-                       return $string;
-               }
+               // Order search strings from longest to shortest
+               uksort( $replacements, function( $a, $b ) {
+                       return strlen( $b ) - strlen( $a );
+               } );
 
-               for ( $i = 1; $i <= 12; $i++ ) {
-                       $string = str_replace( $from->getMonthAbbreviation( $i 
), $to->getMonthName( $i ), $string );
+               foreach ( $replacements as $search => $replace ) {
+                       $unlocalized = str_replace( $search, $replace, $string, 
$count );
+
+                       if ( $count === 1 ) {
+                               return $unlocalized;
+                       }
                }
 
                return $string;
        }
 
-}
\ No newline at end of file
+}
diff --git a/lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php 
b/lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php
index 7a47e43..9251c9f 100644
--- a/lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php
+++ b/lib/tests/phpunit/parsers/MonthNameUnlocalizerTest.php
@@ -20,28 +20,30 @@
 
        public function provideUnlocalize() {
                $testCases = array(
-
-                       //Should unlocalize dates
+                       // Should unlocalize dates
                        array( '1 Juli 2013', 'de', '1 July 2013' ),
+                       array( '1 Julis 2013', 'de', '1 July 2013' ),
                        array( '1 Januarie 1999', 'af', '1 January 1999' ),
                        array( '1 Jan 1999', 'af', '1 January 1999' ),
                        array( '16 Jenna 1999', 'bar', '16 January 1999' ),
 
-                       //Shouldn#t do anything if we cant / don#t need to
+                       // Shouldn't do anything if we can't or don't need to
                        array( '1 June 2013', 'en', '1 June 2013' ),
                        array( '1 Jan 2013', 'en', '1 Jan 2013' ),
+                       array( '1 January 1999', 'en', '1 January 1999' ),
                        array( '16 FooBarBarxxx 1999', 'bar', '16 FooBarBarxxx 
1999' ),
-
+                       array( 'Juli Juli', 'de', 'Juli Juli' ),
                );
 
-               //Loop through some other languages
-               $someLangs = array( 'war', 'ceb', 'uk', 'ru', 'de' );
+               // Loop through some other languages
+               $languageCodes = array( 'war', 'ceb', 'uk', 'ru', 'de' );
                $en = Language::factory( 'en' );
 
-               foreach( $someLangs as $from ) {
+               foreach ( $languageCodes as $from ) {
                        $fromLang = Language::factory( $from );
                        for ( $i = 1; $i <= 12; $i++ ) {
                                $testCases[] = array( $fromLang->getMonthName( 
$i ), $from, $en->getMonthName( $i ) );
+                               $testCases[] = array( 
$fromLang->getMonthNameGen( $i ), $from, $en->getMonthName( $i ) );
                                $testCases[] = array( 
$fromLang->getMonthAbbreviation( $i ), $from, $en->getMonthName( $i ) );
                        }
                }
@@ -53,16 +55,16 @@
         * @dataProvider provideUnlocalize
         *
         * @param $localized
-        * @param $lang
+        * @param $languageCode
         * @param $expected
         */
-       public function testUnlocalize( $localized, $lang, $expected ) {
+       public function testUnlocalize( $localized, $languageCode, $expected ) {
                $monthUnlocalizer = new MonthNameUnlocalizer();
                $options = new ParserOptions();
 
-               $actual = $monthUnlocalizer->unlocalize( $localized, $lang, 
$options );
+               $actual = $monthUnlocalizer->unlocalize( $localized, 
$languageCode, $options );
 
                $this->assertEquals( $expected, $actual );
        }
 
-}
\ No newline at end of file
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/148113
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I232fd3b5433f04396c53eecf3ad49d025a84ee64
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to