jenkins-bot has submitted this change and it was merged.

Change subject: Update groupStatistics.php to use Maintenance class
......................................................................


Update groupStatistics.php to use Maintenance class

Change-Id: I0298d797eecdab6d49f94a5b4a8859d69e008f9f
---
M scripts/groupStatistics.php
1 file changed, 659 insertions(+), 628 deletions(-)

Approvals:
  Nikerabbit: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/groupStatistics.php b/scripts/groupStatistics.php
index 5f5d5f9..3bc225b 100644
--- a/scripts/groupStatistics.php
+++ b/scripts/groupStatistics.php
@@ -1,7 +1,7 @@
 <?php
 /**
  * Commandline script to general statistics about the localisation level of
- * localisation for one or more message groups.
+ * one or more message groups.
  *
  * @file
  * @ingroup Script Stats
@@ -11,176 +11,667 @@
  * @license GPL-2.0+
  */
 
-/**
- * Array of the most spoken languages in the world.
- * Source: http://stats.wikimedia.org/EN/Sitemap.htm.
- *
- * Key value pairs of:
- * [MediaWiki localisation code] => array(
- *    [position in top 50],
- *    [speakers in millions],
- *    [continent where localisation is spoken]
- * )
- *
- */
-$mostSpokenLanguages = array(
-       'en' => array( 1, 1500, 'multiple' ),
-       'zh-hans' => array( 2, 1300, 'asia' ),
-       'zh-hant' => array( 2, 1300, 'asia' ),
-       'hi' => array( 3, 550, 'asia' ),
-       'ar' => array( 4, 530, 'multiple' ),
-       'es' => array( 5, 500, 'multiple' ),
-       'ms' => array( 6, 300, 'asia' ),
-       'pt' => array( 7, 290, 'multiple' ),
-       'pt-br' => array( 7, 290, 'america' ),
-       'ru' => array( 8, 278, 'multiple' ),
-       'id' => array( 9, 250, 'asia' ),
-       'bn' => array( 10, 230, 'asia' ),
-       'fr' => array( 11, 200, 'multiple' ),
-       'de' => array( 12, 185, 'europe' ),
-       'ja' => array( 13, 132, 'asia' ),
-       'fa' => array( 14, 107, 'asia' ),
-       'pnb' => array( 15, 104, 'asia' ), // Most spoken variant
-       'tl' => array( 16, 90, 'asia' ),
-       'mr' => array( 17, 90, 'asia' ),
-       'vi' => array( 18, 80, 'asia' ),
-       'jv' => array( 19, 80, 'asia' ),
-       'te' => array( 20, 80, 'asia' ),
-       'ko' => array( 21, 78, 'asia' ),
-       'wuu' => array( 22, 77, 'asia' ),
-       'arz' => array( 23, 76, 'africa' ),
-       'th' => array( 24, 73, 'asia' ),
-       'yue' => array( 25, 71, 'asia' ),
-       'tr' => array( 26, 70, 'multiple' ),
-       'it' => array( 27, 70, 'europe' ),
-       'ta' => array( 28, 66, 'asia' ),
-       'ur' => array( 29, 60, 'asia' ),
-       'my' => array( 30, 52, 'asia' ),
-       'sw' => array( 31, 50, 'africa' ),
-       'nan' => array( 32, 49, 'asia' ),
-       'kn' => array( 33, 47, 'asia' ),
-       'gu' => array( 34, 46, 'asia' ),
-       'uk' => array( 35, 45, 'europe' ),
-       'pl' => array( 36, 43, 'europe' ),
-       'sd' => array( 37, 41, 'asia' ),
-       'ha' => array( 38, 39, 'africa' ),
-       'ml' => array( 39, 37, 'asia' ),
-       'gan-hans' => array( 40, 35, 'asia' ),
-       'gan-hant' => array( 40, 35, 'asia' ),
-       'hak' => array( 41, 34, 'asia' ),
-       'or' => array( 42, 31, 'asia' ),
-       'ne' => array( 43, 30, 'asia' ),
-       'ro' => array( 44, 28, 'europe' ),
-       'su' => array( 45, 27, 'asia' ),
-       'az' => array( 46, 27, 'asia' ),
-       'nl' => array( 47, 27, 'europe' ),
-       'zu' => array( 48, 26, 'africa' ),
-       'ps' => array( 49, 26, 'asia' ),
-       'ckb' => array( 50, 26, 'asia' ),
-       'ku-latn' => array( 50, 26, 'asia' ),
-);
+// Standard boilerplate to define $IP
+if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
+       $IP = getenv( 'MW_INSTALL_PATH' );
+} else {
+       $dir = __DIR__;
+       $IP = "$dir/../../..";
+}
+require_once "$IP/maintenance/Maintenance.php";
 
-/**
- * Variable with key-value pairs with a named index and an array of key-value
- * pairs where the key is a MessageGroup ID and the value is a weight of the
- * group in the sum of the values for all the groups in the array.
- *
- * Definitions in this variable can be used to report weighted meta 
localisation
- * scores for the 50 most spoken languages.
- *
- * @todo Allow weighted reporting for all available languges.
- */
-$localisedWeights = array(
-       'wikimedia' => array(
-               'core-0-mostused' => 40,
-               'core' => 30,
-               'ext-0-wikimedia' => 30
-       ),
-       'fundraiser' => array(
-               'ext-di-di' => 16,
-               'ext-di-pfpg' => 84,
-       ),
-       'mediawiki' => array(
-               'core-0-mostused' => 30,
-               'core' => 30,
-               'ext-0-wikimedia' => 20,
-               'ext-0-all' => 20
-       )
-);
+class GroupStatictics extends Maintenance {
+       /**
+        * Array of the most spoken languages in the world.
+        * Source: http://stats.wikimedia.org/EN/Sitemap.htm.
+        *
+        * Key value pairs of:
+        * [MediaWiki localisation code] => array(
+        *    [position in top 50],
+        *    [speakers in millions],
+        *    [continent where localisation is spoken]
+        * )
+        */
+       public $mostSpokenLanguages = array(
+               'en' => array( 1, 1500, 'multiple' ),
+               'zh-hans' => array( 2, 1300, 'asia' ),
+               'zh-hant' => array( 2, 1300, 'asia' ),
+               'hi' => array( 3, 550, 'asia' ),
+               'ar' => array( 4, 530, 'multiple' ),
+               'es' => array( 5, 500, 'multiple' ),
+               'ms' => array( 6, 300, 'asia' ),
+               'pt' => array( 7, 290, 'multiple' ),
+               'pt-br' => array( 7, 290, 'america' ),
+               'ru' => array( 8, 278, 'multiple' ),
+               'id' => array( 9, 250, 'asia' ),
+               'bn' => array( 10, 230, 'asia' ),
+               'fr' => array( 11, 200, 'multiple' ),
+               'de' => array( 12, 185, 'europe' ),
+               'ja' => array( 13, 132, 'asia' ),
+               'fa' => array( 14, 107, 'asia' ),
+               'pnb' => array( 15, 104, 'asia' ), // Most spoken variant
+               'tl' => array( 16, 90, 'asia' ),
+               'mr' => array( 17, 90, 'asia' ),
+               'vi' => array( 18, 80, 'asia' ),
+               'jv' => array( 19, 80, 'asia' ),
+               'te' => array( 20, 80, 'asia' ),
+               'ko' => array( 21, 78, 'asia' ),
+               'wuu' => array( 22, 77, 'asia' ),
+               'arz' => array( 23, 76, 'africa' ),
+               'th' => array( 24, 73, 'asia' ),
+               'yue' => array( 25, 71, 'asia' ),
+               'tr' => array( 26, 70, 'multiple' ),
+               'it' => array( 27, 70, 'europe' ),
+               'ta' => array( 28, 66, 'asia' ),
+               'ur' => array( 29, 60, 'asia' ),
+               'my' => array( 30, 52, 'asia' ),
+               'sw' => array( 31, 50, 'africa' ),
+               'nan' => array( 32, 49, 'asia' ),
+               'kn' => array( 33, 47, 'asia' ),
+               'gu' => array( 34, 46, 'asia' ),
+               'uk' => array( 35, 45, 'europe' ),
+               'pl' => array( 36, 43, 'europe' ),
+               'sd' => array( 37, 41, 'asia' ),
+               'ha' => array( 38, 39, 'africa' ),
+               'ml' => array( 39, 37, 'asia' ),
+               'gan-hans' => array( 40, 35, 'asia' ),
+               'gan-hant' => array( 40, 35, 'asia' ),
+               'hak' => array( 41, 34, 'asia' ),
+               'or' => array( 42, 31, 'asia' ),
+               'ne' => array( 43, 30, 'asia' ),
+               'ro' => array( 44, 28, 'europe' ),
+               'su' => array( 45, 27, 'asia' ),
+               'az' => array( 46, 27, 'asia' ),
+               'nl' => array( 47, 27, 'europe' ),
+               'zu' => array( 48, 26, 'africa' ),
+               'ps' => array( 49, 26, 'asia' ),
+               'ckb' => array( 50, 26, 'asia' ),
+               'ku-latn' => array( 50, 26, 'asia' ),
+       );
 
-/**
- * Code map to map localisation codes to Wikimedia project codes. Only
- * exclusion and remapping is defined here. It is assumed that the first part
- * of the localisation code is the WMF project name otherwise (zh-hans -> zh).
- */
-$wikimediaCodeMap = array(
-       // Codes containing a dash
-       'bat-smg' => 'bat-smg',
-       'cbk-zam' => 'cbk-zam',
-       'map-bms' => 'map-bms',
-       'nds-nl' => 'nds-nl',
-       'roa-rup' => 'roa-rup',
-       'roa-tara' => 'roa-tara',
+       /**
+        * Variable with key-value pairs with a named index and an array of 
key-value
+        * pairs where the key is a MessageGroup ID and the value is a weight 
of the
+        * group in the sum of the values for all the groups in the array.
+        *
+        * Definitions in this variable can be used to report weighted meta 
localisation
+        * scores for the 50 most spoken languages.
+        *
+        * @todo Allow weighted reporting for all available languges.
+        */
+       public $localisedWeights = array(
+               'wikimedia' => array(
+                       'core-0-mostused' => 40,
+                       'core' => 30,
+                       'ext-0-wikimedia' => 30
+               ),
+               'fundraiser' => array(
+                       'ext-di-di' => 16,
+                       'ext-di-pfpg' => 84,
+               ),
+               'mediawiki' => array(
+                       'core-0-mostused' => 30,
+                       'core' => 30,
+                       'ext-0-wikimedia' => 20,
+                       'ext-0-all' => 20
+               )
+       );
 
-       // Remaps
-       'be-tarask' => 'be-x-old',
-       'gsw' => 'als',
-       'ike-cans' => 'iu',
-       'ike-latn' => 'iu',
-       'lzh' => 'zh-classical',
-       'nan' => 'zh-min-nan',
-       'vro' => 'fiu-vro',
-       'yue' => 'zh-yue',
+       /**
+        * Code map to map localisation codes to Wikimedia project codes. Only
+        * exclusion and remapping is defined here. It is assumed that the 
first part
+        * of the localisation code is the WMF project name otherwise (zh-hans 
-> zh).
+        */
+       public $wikimediaCodeMap = array(
+               // Codes containing a dash
+               'bat-smg' => 'bat-smg',
+               'cbk-zam' => 'cbk-zam',
+               'map-bms' => 'map-bms',
+               'nds-nl' => 'nds-nl',
+               'roa-rup' => 'roa-rup',
+               'roa-tara' => 'roa-tara',
 
-       // Ignored language codes. See reason.
-       'als' => '', // gsw
-       'be-x-old' => '', // be-tarask
-       'crh' => '', // crh-*
-       'de-at' => '', // de
-       'de-ch' => '', // de
-       'de-formal' => '', // de, not reporting formal form
-       'dk' => '', // da
-       'en-au' => '', // en
-       'en-ca' => '', // no MW code
-       'en-gb' => '', // no MW code
-       'es-419' => '', // no MW code
-       'fiu-vro' => '', // vro
-       'gan' => '', // gan-*
-       'got' => '', // extinct. not reporting formal form
-       'hif' => '', // hif-*
-       'hu-formal' => '', // not reporting
-       'iu' => '', // ike-*
-       'kk' => '', // kk-*
-       'kk-cn' => '', // kk-arab
-       'kk-kz' => '', // kk-cyrl
-       'kk-tr' => '', // kk-latn
-       'ko-kp' => '', // ko
-       'ku' => '', // ku-*
-       'ku-arab' => '', // ckb
-       'nb' => '', // no
-       'nl-be' => '', // no MW code
-       'nl-informal' => '', // nl, not reporting informal form
-       'ruq' => '', // ruq-*
-       'simple' => '', // en
-       'sr' => '', // sr-*
-       'tg' => '', // tg-*
-       'tp' => '', // tokipona
-       'tt' => '', // tt-*
-       'ug' => '', // ug-*
-       'zh' => '', // zh-*
-       'zh-classical' => '', // lzh
-       'zh-cn' => '', // zh
-       'zh-sg' => '', // zh
-       'zh-hk' => '', // zh
-       'zh-min-nan' => '', //
-       'zh-mo' => '', // zh
-       'zh-my' => '', // zh
-       'zh-tw' => '', // zh
-       'zh-yue' => '', // yue
-);
+               // Remaps
+               'be-tarask' => 'be-x-old',
+               'gsw' => 'als',
+               'ike-cans' => 'iu',
+               'ike-latn' => 'iu',
+               'lzh' => 'zh-classical',
+               'nan' => 'zh-min-nan',
+               'vro' => 'fiu-vro',
+               'yue' => 'zh-yue',
 
-$optionsWithArgs = array( 'groups', 'output', 'skiplanguages', 'legenddetail', 
'legendsummary' );
-require __DIR__ . '/cli.inc';
+               // Ignored language codes. See reason.
+               'als' => '', // gsw
+               'be-x-old' => '', // be-tarask
+               'crh' => '', // crh-*
+               'de-at' => '', // de
+               'de-ch' => '', // de
+               'de-formal' => '', // de, not reporting formal form
+               'dk' => '', // da
+               'en-au' => '', // en
+               'en-ca' => '', // no MW code
+               'en-gb' => '', // no MW code
+               'es-419' => '', // no MW code
+               'fiu-vro' => '', // vro
+               'gan' => '', // gan-*
+               'got' => '', // extinct. not reporting formal form
+               'hif' => '', // hif-*
+               'hu-formal' => '', // not reporting
+               'iu' => '', // ike-*
+               'kk' => '', // kk-*
+               'kk-cn' => '', // kk-arab
+               'kk-kz' => '', // kk-cyrl
+               'kk-tr' => '', // kk-latn
+               'ko-kp' => '', // ko
+               'ku' => '', // ku-*
+               'ku-arab' => '', // ckb
+               'nb' => '', // no
+               'nl-be' => '', // no MW code
+               'nl-informal' => '', // nl, not reporting informal form
+               'ruq' => '', // ruq-*
+               'simple' => '', // en
+               'sr' => '', // sr-*
+               'tg' => '', // tg-*
+               'tp' => '', // tokipona
+               'tt' => '', // tt-*
+               'ug' => '', // ug-*
+               'zh' => '', // zh-*
+               'zh-classical' => '', // lzh
+               'zh-cn' => '', // zh
+               'zh-sg' => '', // zh
+               'zh-hk' => '', // zh
+               'zh-min-nan' => '', //
+               'zh-mo' => '', // zh
+               'zh-my' => '', // zh
+               'zh-tw' => '', // zh
+               'zh-yue' => '', // yue
+       );
+
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = 'Script to generate statistics about the 
localisation ' .
+                       'level of one or more message groups';
+               $this->addOption(
+                       'groups',
+                       '(optional) Comma separated list of groups',
+                       false, /*required*/
+                       true /*has arg*/
+               );
+               $this->addOption(
+                       'output',
+                       '(optional) csv: Comma Separated Values, wiki: 
MediaWiki syntax, ' .
+                               'text: Text with tabs. Default: default',
+                       false, /*required*/
+                       true /*has arg*/
+               );
+               $this->addOption(
+                       'skiplanguages',
+                       '(optional) Comma separated list of languages to be 
skipped',
+                       false, /*required*/
+                       true /*has arg*/
+               );
+               $this->addOption(
+                       'skipzero',
+                       '(optional) Skip languages that do not have any 
localisation at all'
+               );
+               $this->addOption(
+                       'legenddetail',
+                       '(optional) Page name for legend to be transcluded at 
the top of the details table',
+                       false, /*required*/
+                       true /*has arg*/
+               );
+               $this->addOption(
+                       'legendsummary',
+                       '(optional) Page name for legend to be transcluded at 
the top of the summary table',
+                       false, /*required*/
+                       true /*has arg*/
+               );
+               $this->addOption(
+                       'fuzzy',
+                       '(optional) Add column for fuzzy counts'
+               );
+               $this->addOption(
+                       'speakers',
+                       '(optional) Add column for number of speakers (est.). ' 
.
+                       'Only valid when combined with "most"'
+               );
+               $this->addOption(
+                       'nol10n',
+                       '(optional) Do not add localised language name if 
I18ntags is installed'
+               );
+               $this->addOption(
+                       'continent',
+                       '(optional) Add a continent column. Only available when 
output is ' .
+                       '"wiki" or not specified.'
+               );
+               $this->addOption(
+                       'summary',
+                       '(optional) Add a summary with counts and scores per 
continent category ' .
+                       'and totals. Only available for a valid "most" value.',
+                       false, /*required*/
+                       true /*has arg*/
+               );
+               $this->addOption(
+                       'wmfscore',
+                       'Only output WMF language code and weighted score for 
all ' .
+                       'language codes for weighing group "wikimedia" in CSV. 
This ' .
+                       'report must keep a stable layout as it is used/will be 
' .
+                       'used in the Wikimedia statistics.'
+               );
+               $this->addOption(
+                       'most',
+                       '(optional) "mediawiki" or "wikimedia". Report on the 
50 most ' .
+                       'spoken languages. Skipzero is ignored. If a valid 
scope is ' .
+                       'defined, the group list and fuzzy are ignored and the 
' .
+                       'localisation levels are weighted and reported.',
+                       false, /*required*/
+                       true /*has arg*/
+               );
+       }
+
+       public function execute() {
+               $output = $this->getOption( 'output', 'default' );
+
+               // Select an output engine
+               switch ( $output ) {
+                       case 'wiki':
+                               $out = new wikiStatsOutput();
+                               break;
+                       case 'text':
+                               $out = new textStatsOutput();
+                               break;
+                       case 'csv':
+                               $out = new csvStatsOutput();
+                               break;
+                       default:
+                               $out = new TranslateStatsOutput();
+               }
+
+               $skipLanguages = array();
+               if ( $this->hasOption( 'skiplanguages' ) ) {
+                       $skipLanguages = array_map(
+                               'trim',
+                               explode( ',', $this->getOption( 'skiplanguages' 
) )
+                       );
+               }
+
+               $reportScore = false;
+               // Check if score should be reported and prepare weights
+               $most = $this->getOption( 'most' );
+               $weights = array();
+               if ( $most && isset( $localisedWeights[$most] ) ) {
+                       $reportScore = true;
+
+                       foreach ( $localisedWeights[$most] as $weight ) {
+                               $weights[] = $weight;
+                       }
+               }
+
+               // check if l10n should be done
+               $l10n = false;
+               if ( ( $output === 'wiki' || $output === 'default' ) &&
+                       !$this->hasOption( 'nol10n' )
+               ) {
+                       $l10n = true;
+               }
+
+               $wmfscore = $this->hasOption( 'wmfscore ' );
+
+               // Get groups from input
+               $groups = array();
+               if ( $reportScore ) {
+                       $reqGroups = array_keys( $this->localisedWeights[$most] 
);
+               } elseif ( !$wmfscore ) {
+                       $reqGroups = array_map( 'trim', explode( ',', 
$this->getOption( 'groups' ) ) );
+               } else {
+                       $reqGroups = array_keys( 
$this->localisedWeights['wikimedia'] );
+               }
+
+               // List of all groups
+               $allGroups = MessageGroups::singleton()->getGroups();
+
+               // Get list of valid groups
+               foreach ( $reqGroups as $id ) {
+                       // Page translation group ids use spaces which are not 
nice on command line
+                       $id = str_replace( '_', ' ', $id );
+                       if ( isset( $allGroups[$id] ) ) {
+                               $groups[$id] = $allGroups[$id];
+                       } else {
+                               $this->output( "Unknown group: $id" );
+                       }
+               }
+
+               if ( $wmfscore ) {
+                       // Override/set parameters
+                       $out = new csvStatsOutput();
+                       $reportScore = true;
+
+                       $weights = array();
+                       foreach ( $this->localisedWeights['wikimedia'] as 
$weight ) {
+                               $weights[] = $weight;
+                       }
+                       $wmfscores = array();
+               }
+
+               if ( !count( $groups ) ) {
+                       showUsage();
+               }
+
+               // List of all languages.
+               $languages = Language::fetchLanguageNames( false );
+               // Default sorting order by language code, users can sort wiki 
output.
+               ksort( $languages );
+
+               if ( $this->hasOption( 'legenddetail' ) ) {
+                       $out->addFreeText( "{{" . $this->getOption( 
'legenddetail' ) . "}}\n" );
+               }
+
+               $totalWeight = 0;
+               if ( $reportScore ) {
+                       if ( $wmfscore ) {
+                               foreach ( $this->localisedWeights['wikimedia'] 
as $weight ) {
+                                       $totalWeight += $weight;
+                               }
+                       } else {
+                               foreach ( $this->localisedWeights[$most] as 
$weight ) {
+                                       $totalWeight += $weight;
+                               }
+                       }
+               }
+
+               $showContinent = $this->getOption( 'continent' );
+               if ( !$wmfscore ) {
+                       // Output headers
+                       $out->heading();
+
+                       $out->blockstart();
+
+                       if ( $most ) {
+                               $out->element( ( $l10n ? 
"{{int:translate-gs-pos}}" : 'Pos.' ), true );
+                       }
+
+                       $out->element( ( $l10n ? "{{int:translate-gs-code}}" : 
'Code' ), true );
+                       $out->element( ( $l10n ? 
"{{int:translate-page-language}}" : 'Language' ), true );
+                       if ( $showContinent ) {
+                               $out->element( ( $l10n ? 
"{{int:translate-gs-continent}}" : 'Continent' ), true );
+                       }
+
+                       if ( $most && $this->hasOption( 'speakers' ) ) {
+                               $out->element( ( $l10n ? 
"{{int:translate-gs-speakers}}" : 'Speakers' ), true );
+                       }
+
+                       if ( $reportScore ) {
+                               $out->element(
+                                       ( $l10n ? "{{int:translate-gs-score}}" 
: 'Score' ) . ' (' . $totalWeight . ')',
+                                       true
+                               );
+                       }
+
+                       /**
+                        * @var $g MessageGroup
+                        */
+                       foreach ( $groups as $g ) {
+                               // Add unprocessed description of group as 
heading
+                               if ( $reportScore ) {
+                                       $gid = $g->getId();
+                                       $heading = $g->getLabel() . " (" . 
$this->localisedWeights[$most][$gid] . ")";
+                               } else {
+                                       $heading = $g->getLabel();
+                               }
+                               $out->element( $heading, true );
+                               if ( !$reportScore && $this->hasOption( 'fuzzy' 
) ) {
+                                       $out->element( ( $l10n ? 
"{{int:translate-percentage-fuzzy}}" : 'Fuzzy' ), true );
+                               }
+                       }
+
+                       $out->blockend();
+               }
+
+               $rows = array();
+               foreach ( $languages as $code => $name ) {
+                       // Skip list
+                       if ( in_array( $code, $skipLanguages ) ) {
+                               continue;
+                       }
+                       $rows[$code] = array();
+               }
+
+               foreach ( $groups as $groupName => $g ) {
+                       $stats = MessageGroupStats::forGroup( $groupName );
+
+                       // Perform the statistic calculations on every language
+                       foreach ( $languages as $code => $name ) {
+                               // Skip list
+                               if ( !$most && in_array( $code, $skipLanguages 
) ) {
+                                       continue;
+                               }
+
+                               // Do not calculate if we do not need it for 
anything.
+                               if ( $wmfscore && isset( 
$wikimediaCodeMap[$code] ) && $wikimediaCodeMap[$code] == '' ) {
+                                       continue;
+                               }
+
+                               // If --most is set, skip all other
+                               if ( $most && !isset( 
$mostSpokenLanguages[$code] ) ) {
+                                       continue;
+                               }
+
+                               $total = 
$stats[$code][MessageGroupStats::TOTAL];
+                               $translated = 
$stats[$code][MessageGroupStats::TRANSLATED];
+                               $fuzzy = 
$stats[$code][MessageGroupStats::FUZZY];
+
+                               $rows[$code][] = array( false, $translated, 
$total );
+
+                               if ( $this->hasOption( 'fuzzy' ) ) {
+                                       $rows[$code][] = array( true, $fuzzy, 
$total );
+                               }
+                       }
+
+                       unset( $collection );
+               }
+
+               // init summary array
+               $summarise = false;
+               if ( $this->hasOption( 'summary' ) ) {
+                       $summarise = true;
+                       $summary = array();
+               }
+
+               foreach ( $languages as $code => $name ) {
+                       // Skip list
+                       if ( !$most && in_array( $code, $skipLanguages ) ) {
+                               continue;
+                       }
+
+                       // Skip unneeded
+                       if ( $wmfscore && isset( $wikimediaCodeMap[$code] ) && 
$wikimediaCodeMap[$code] == '' ) {
+                               continue;
+                       }
+
+                       // If --most is set, skip all other
+                       if ( $most && !isset( $mostSpokenLanguages[$code] ) ) {
+                               continue;
+                       }
+
+                       $columns = $rows[$code];
+
+                       $allZero = true;
+                       foreach ( $columns as $fields ) {
+                               if ( intval( $fields[1] ) !== 0 ) {
+                                       $allZero = false;
+                               }
+                       }
+
+                       // Skip dummy languages if requested
+                       if ( $allZero && $this->hasOption( 'skipzero' ) ) {
+                               continue;
+                       }
+
+                       // Output the the row
+                       if ( !$wmfscore ) {
+                               $out->blockstart();
+                       }
+
+                       // Fill language position field
+                       if ( $most ) {
+                               $out->element( 
$this->mostSpokenLanguages[$code][0] );
+                       }
+
+                       // Fill language name field
+                       if ( !$wmfscore ) {
+                               // Fill language code field
+                               $out->element( $code );
+
+                               if ( $l10n && function_exists( 'efI18nTagsInit' 
) ) {
+                                       $out->element( "{{#languagename:" . 
$code . "}}" );
+                               } else {
+                                       $out->element( $name );
+                               }
+                       }
+
+                       // Fill continent field
+                       if ( $showContinent ) {
+                               if ( $this->mostSpokenLanguages[$code][2] == 
'multiple' ) {
+                                       $continent = ( $l10n ? 
"{{int:translate-gs-multiple}}" : 'Multiple' );
+                               } else {
+                                       $continent = $l10n ?
+                                               "{{int:timezoneregion-" . 
$this->mostSpokenLanguages[$code][2] . "}}" :
+                                               ucfirst( 
$this->mostSpokenLanguages[$code][2] );
+                               }
+
+                               $out->element( $continent );
+                       }
+
+                       // Fill speakers field
+                       if ( $most && $this->hasOption( 'speakers' ) ) {
+                               $out->element( number_format( 
$this->mostSpokenLanguages[$code][1] ) );
+                       }
+
+                       // Fill the score field
+                       if ( $reportScore ) {
+                               // Keep count
+                               $i = 0;
+                               // Start with 0 points
+                               $score = 0;
+
+                               foreach ( $columns as $fields ) {
+                                       list( , $upper, $total ) = $fields;
+                                       // Weigh the score and add it to the 
current score
+                                       $score += ( $weights[$i] * $upper ) / 
$total;
+                                       $i++;
+                               }
+
+                               // Report a round numbers
+                               $score = number_format( $score, 0 );
+
+                               if ( $summarise ) {
+                                       $continent = 
$this->mostSpokenLanguages[$code][2];
+                                       if ( isset( $summary[$continent] ) ) {
+                                               $newcount = 
$summary[$continent][0] + 1;
+                                               $newscore = 
$summary[$continent][1] + (int)$score;
+                                       } else {
+                                               $newcount = 1;
+                                               $newscore = $score;
+                                       }
+
+                                       $summary[$continent] = array( 
$newcount, $newscore );
+                               }
+
+                               if ( $wmfscore ) {
+                                       // Multiple variants can be used for 
the same wiki.
+                                       // Store the scores in an array and 
output them later
+                                       // when they can be averaged.
+                                       if ( isset( $wikimediaCodeMap[$code] ) 
) {
+                                               $wmfcode = 
$wikimediaCodeMap[$code];
+                                       } else {
+                                               $codeparts = explode( '-', 
$code );
+                                               $wmfcode = $codeparts[0];
+                                       }
+
+                                       if ( isset( $wmfscores[$wmfcode] ) ) {
+                                               $count = 
$wmfscores[$wmfcode]['count'] + 1;
+                                               $tmpWmfScore = 
(int)$wmfscores[$wmfcode]['score'];
+                                               $tmpWmfCount = 
(int)$wmfscores[$wmfcode]['count'];
+                                               $score = ( ( $tmpWmfCount * 
$tmpWmfScore ) + (int)$score ) / $count;
+                                               $wmfscores[$wmfcode] = array( 
'score' => $score, 'count' => $count );
+                                       } else {
+                                               $wmfscores[$wmfcode] = array( 
'score' => $score, 'count' => 1 );
+                                       }
+                               } else {
+                                       $out->element( $score );
+                               }
+                       }
+
+                       // Fill fields for groups
+                       if ( !$wmfscore ) {
+                               foreach ( $columns as $fields ) {
+                                       list( $invert, $upper, $total ) = 
$fields;
+                                       $c = $out->formatPercent( $upper, 
$total, $invert );
+                                       $out->element( $c );
+                               }
+
+                               $out->blockend();
+                       }
+               }
+
+               $out->footer();
+
+               if ( $reportScore && $this->hasOption( 'summary' ) ) {
+                       if ( $reportScore && $this->hasOption( 'legendsummary' 
) ) {
+                               $out->addFreeText( "{{" . $this->getOption( 
'legendsummary' ) . "}}\n" );
+                       }
+
+                       $out->summaryheading();
+
+                       $out->blockstart();
+
+                       $out->element( $l10n ? "{{int:translate-gs-continent}}" 
: 'Continent', true );
+                       $out->element( $l10n ? "{{int:translate-gs-count}}" : 
'Count', true );
+                       $out->element( $l10n ? "{{int:translate-gs-avgscore}}" 
: 'Avg. score', true );
+
+                       $out->blockend();
+
+                       ksort( $summary );
+
+                       $totals = array( 0, 0 );
+
+                       foreach ( $summary as $key => $values ) {
+                               $out->blockstart();
+
+                               if ( $key == 'multiple' ) {
+                                       $out->element( $l10n ? 
"{{int:translate-gs-multiple}}" : 'Multiple' );
+                               } else {
+                                       $out->element( $l10n ? 
"{{int:timezoneregion-" . $key . "}}" : ucfirst( $key ) );
+                               }
+                               $out->element( $values[0] );
+                               $out->element( number_format( $values[1] / 
$values[0] ) );
+
+                               $out->blockend();
+
+                               $totals[0] += $values[0];
+                               $totals[1] += $values[1];
+                       }
+
+                       $out->blockstart();
+                       $out->element( $l10n ? "{{int:translate-gs-total}}" : 
'Total' );
+                       $out->element( $totals[0] );
+                       $out->element( number_format( $totals[1] / $totals[0] ) 
);
+                       $out->blockend();
+
+                       $out->footer();
+               }
+
+               // Custom output
+               if ( $wmfscore ) {
+                       ksort( $wmfscores );
+
+                       foreach ( $wmfscores as $code => $stats ) {
+                               echo $code . ';' . number_format( 
$stats['score'] ) . ";\n";
+                       }
+               }
+       }
+}
 
 /**
  * Provides heading, summaryheading and free text addition for stats output in
@@ -206,465 +697,5 @@
        }
 }
 
-if ( isset( $options['help'] ) ) {
-       showUsage();
-}
-
-// Show help and exit if '--most' does not have a valid value and no groups set
-if ( isset( $options['most'] ) &&
-       !isset( $localisedWeights[$options['most']] ) &&
-       !isset( $options['groups'] )
-) {
-       showUsage();
-}
-
-if ( !isset( $options['output'] ) ) {
-       $options['output'] = 'default';
-}
-
-# Select an output engine
-switch ( $options['output'] ) {
-       case 'wiki':
-               $out = new wikiStatsOutput();
-               break;
-       case 'text':
-               $out = new textStatsOutput();
-               break;
-       case 'csv':
-               $out = new csvStatsOutput();
-               break;
-       case 'default':
-               $out = new TranslateStatsOutput();
-               break;
-       default:
-               showUsage();
-}
-
-$skipLanguages = array();
-if ( isset( $options['skiplanguages'] ) ) {
-       $skipLanguages = array_map( 'trim', explode( ',', 
$options['skiplanguages'] ) );
-}
-
-$reportScore = false;
-// Check if score should be reported and prepare weights
-if ( isset( $options['most'] ) && isset( $localisedWeights[$options['most']] ) 
) {
-       $reportScore = true;
-       $weights = array();
-
-       foreach ( $localisedWeights[$options['most']] as $weight ) {
-               $weights[] = $weight;
-       }
-}
-
-// check if l10n should be done
-$l10n = false;
-if ( ( $options['output'] == 'wiki' || $options['output'] == 'default' ) &&
-       !isset( $options['nol10n'] )
-) {
-       $l10n = true;
-}
-
-$wmfscore = false;
-if ( isset( $options['wmfscore'] ) ) {
-       $wmfscore = true;
-}
-
-// Get groups from input
-$groups = array();
-if ( $reportScore ) {
-       $reqGroups = array_keys( $localisedWeights[$options['most']] );
-} elseif ( !$wmfscore ) {
-       $reqGroups = array_map( 'trim', explode( ',', $options['groups'] ) );
-} else {
-       $reqGroups = array_keys( $localisedWeights['wikimedia'] );
-}
-
-// List of all groups
-$allGroups = MessageGroups::singleton()->getGroups();
-
-// Get list of valid groups
-foreach ( $reqGroups as $id ) {
-       // Page translation group ids use spaces which are not nice on command 
line
-       $id = str_replace( '_', ' ', $id );
-       if ( isset( $allGroups[$id] ) ) {
-               $groups[$id] = $allGroups[$id];
-       } else {
-               STDERR( "Unknown group: $id" );
-       }
-}
-
-if ( $wmfscore ) {
-       // Override/set parameters
-       $out = new csvStatsOutput();
-       $reportScore = true;
-
-       $weights = array();
-       foreach ( $localisedWeights['wikimedia'] as $weight ) {
-               $weights[] = $weight;
-       }
-       $wmfscores = array();
-}
-
-if ( !count( $groups ) ) {
-       showUsage();
-}
-
-// List of all languages.
-$languages = Language::getLanguageNames( false );
-// Default sorting order by language code, users can sort wiki output.
-ksort( $languages );
-
-if ( isset( $options['legenddetail'] ) ) {
-       $out->addFreeText( "{{" . $options['legenddetail'] . "}}\n" );
-}
-
-$totalWeight = 0;
-if ( $reportScore ) {
-       if ( $wmfscore ) {
-               foreach ( $localisedWeights['wikimedia'] as $weight ) {
-                       $totalWeight += $weight;
-               }
-       } else {
-               foreach ( $localisedWeights[$options['most']] as $weight ) {
-                       $totalWeight += $weight;
-               }
-       }
-}
-
-if ( !$wmfscore ) {
-       // Output headers
-       $out->heading();
-
-       $out->blockstart();
-
-       if ( isset( $options['most'] ) ) {
-               $out->element( ( $l10n ? "{{int:translate-gs-pos}}" : 'Pos.' ), 
true );
-       }
-
-       $out->element( ( $l10n ? "{{int:translate-gs-code}}" : 'Code' ), true );
-       $out->element( ( $l10n ? "{{int:translate-page-language}}" : 'Language' 
), true );
-       if ( isset( $options['continent'] ) ) {
-               $out->element( ( $l10n ? "{{int:translate-gs-continent}}" : 
'Continent' ), true );
-       }
-
-       if ( isset( $options['most'] ) && isset( $options['speakers'] ) ) {
-               $out->element( ( $l10n ? "{{int:translate-gs-speakers}}" : 
'Speakers' ), true );
-       }
-
-       if ( $reportScore ) {
-               $out->element(
-                       ( $l10n ? "{{int:translate-gs-score}}" : 'Score' ) . ' 
(' . $totalWeight . ')',
-                       true
-               );
-       }
-
-       /**
-        * @var $g MessageGroup
-        */
-       foreach ( $groups as $g ) {
-               // Add unprocessed description of group as heading
-               if ( $reportScore ) {
-                       $gid = $g->getId();
-                       $heading = $g->getLabel() . " (" . 
$localisedWeights[$options['most']][$gid] . ")";
-               } else {
-                       $heading = $g->getLabel();
-               }
-               $out->element( $heading, true );
-               if ( !$reportScore && isset( $options['fuzzy'] ) ) {
-                       $out->element( ( $l10n ? 
"{{int:translate-percentage-fuzzy}}" : 'Fuzzy' ), true );
-               }
-       }
-
-       $out->blockend();
-}
-
-$rows = array();
-foreach ( $languages as $code => $name ) {
-       // Skip list
-       if ( in_array( $code, $skipLanguages ) ) {
-               continue;
-       }
-       $rows[$code] = array();
-}
-
-foreach ( $groups as $groupName => $g ) {
-       $stats = MessageGroupStats::forGroup( $groupName );
-
-       // Perform the statistic calculations on every language
-       foreach ( $languages as $code => $name ) {
-               // Skip list
-               if ( !isset( $options['most'] ) && in_array( $code, 
$skipLanguages ) ) {
-                       continue;
-               }
-
-               // Do not calculate if we do not need it for anything.
-               if ( $wmfscore && isset( $wikimediaCodeMap[$code] ) && 
$wikimediaCodeMap[$code] == '' ) {
-                       continue;
-               }
-
-               // If --most is set, skip all other
-               if ( isset( $options['most'] ) && !isset( 
$mostSpokenLanguages[$code] ) ) {
-                       continue;
-               }
-
-               $total = $stats[$code][MessageGroupStats::TOTAL];
-               $translated = $stats[$code][MessageGroupStats::TRANSLATED];
-               $fuzzy = $stats[$code][MessageGroupStats::FUZZY];
-
-               $rows[$code][] = array( false, $translated, $total );
-
-               if ( isset( $options['fuzzy'] ) ) {
-                       $rows[$code][] = array( true, $fuzzy, $total );
-               }
-       }
-
-       unset( $collection );
-}
-
-// init summary array
-$summarise = false;
-if ( isset( $options['summary'] ) ) {
-       $summarise = true;
-       $summary = array();
-}
-
-foreach ( $languages as $code => $name ) {
-       // Skip list
-       if ( !isset( $options['most'] ) && in_array( $code, $skipLanguages ) ) {
-               continue;
-       }
-
-       // Skip unneeded
-       if ( $wmfscore && isset( $wikimediaCodeMap[$code] ) && 
$wikimediaCodeMap[$code] == '' ) {
-               continue;
-       }
-
-       // If --most is set, skip all other
-       if ( isset( $options['most'] ) && !isset( $mostSpokenLanguages[$code] ) 
) {
-               continue;
-       }
-
-       $columns = $rows[$code];
-
-       $allZero = true;
-       foreach ( $columns as $fields ) {
-               if ( intval( $fields[1] ) !== 0 ) {
-                       $allZero = false;
-               }
-       }
-
-       // Skip dummy languages if requested
-       if ( $allZero && isset( $options['skipzero'] ) ) {
-               continue;
-       }
-
-       // Output the the row
-       if ( !$wmfscore ) {
-               $out->blockstart();
-       }
-
-       // Fill language position field
-       if ( isset( $options['most'] ) ) {
-               $out->element( $mostSpokenLanguages[$code][0] );
-       }
-
-       // Fill language name field
-       if ( !$wmfscore ) {
-               // Fill language code field
-               $out->element( $code );
-
-               if ( $l10n && function_exists( 'efI18nTagsInit' ) ) {
-                       $out->element( "{{#languagename:" . $code . "}}" );
-               } else {
-                       $out->element( $name );
-               }
-       }
-
-       // Fill continent field
-       if ( isset( $options['continent'] ) ) {
-               if ( $mostSpokenLanguages[$code][2] == 'multiple' ) {
-                       $continent = ( $l10n ? "{{int:translate-gs-multiple}}" 
: 'Multiple' );
-               } else {
-                       $continent = $l10n ?
-                               "{{int:timezoneregion-" . 
$mostSpokenLanguages[$code][2] . "}}" :
-                               ucfirst( $mostSpokenLanguages[$code][2] );
-               }
-
-               $out->element( $continent );
-       }
-
-       // Fill speakers field
-       if ( isset( $options['most'] ) && isset( $options['speakers'] ) ) {
-               $out->element( number_format( $mostSpokenLanguages[$code][1] ) 
);
-       }
-
-       // Fill the score field
-       if ( $reportScore ) {
-               // Keep count
-               $i = 0;
-               // Start with 0 points
-               $score = 0;
-
-               foreach ( $columns as $fields ) {
-                       list( $invert, $upper, $total ) = $fields;
-                       // Weigh the score and add it to the current score
-                       $score += ( $weights[$i] * $upper ) / $total;
-                       $i++;
-               }
-
-               // Report a round numbers
-               $score = number_format( $score, 0 );
-
-               if ( $summarise ) {
-                       $continent = $mostSpokenLanguages[$code][2];
-                       if ( isset( $summary[$continent] ) ) {
-                               $newcount = $summary[$continent][0] + 1;
-                               $newscore = $summary[$continent][1] + $score;
-                       } else {
-                               $newcount = 1;
-                               $newscore = $score;
-                       }
-
-                       $summary[$continent] = array( $newcount, $newscore );
-               }
-
-               if ( $wmfscore ) {
-                       // Multiple variants can be used for the same wiki.
-                       // Store the scores in an array and output them later
-                       // when they can be averaged.
-                       if ( isset( $wikimediaCodeMap[$code] ) ) {
-                               $wmfcode = $wikimediaCodeMap[$code];
-                       } else {
-                               $codeparts = explode( '-', $code );
-                               $wmfcode = $codeparts[0];
-                       }
-
-                       if ( isset( $wmfscores[$wmfcode] ) ) {
-                               $count = $wmfscores[$wmfcode]['count'] + 1;
-                               $tmpWmfScore = (int) 
$wmfscores[$wmfcode]['score'];
-                               $tmpWmfCount = (int) 
$wmfscores[$wmfcode]['count'];
-                               $score = ( ( $tmpWmfCount * $tmpWmfScore ) + 
$score ) / $count;
-                               $wmfscores[$wmfcode] = array( 'score' => 
$score, 'count' => $count );
-                       } else {
-                               $wmfscores[$wmfcode] = array( 'score' => 
$score, 'count' => 1 );
-                       }
-               } else {
-                       $out->element( $score );
-               }
-       }
-
-       // Fill fields for groups
-       if ( !$wmfscore ) {
-               foreach ( $columns as $fields ) {
-                       list( $invert, $upper, $total ) = $fields;
-                       $c = $out->formatPercent( $upper, $total, $invert );
-                       $out->element( $c );
-               }
-
-               $out->blockend();
-       }
-}
-
-$out->footer();
-
-if ( $reportScore && isset( $options['summary'] ) ) {
-       if ( $reportScore && isset( $options['legendsummary'] ) ) {
-               $out->addFreeText( "{{" . $options['legendsummary'] . "}}\n" );
-       }
-
-       $out->summaryheading();
-
-       $out->blockstart();
-
-       $out->element( $l10n ? "{{int:translate-gs-continent}}" : 'Continent', 
true );
-       $out->element( $l10n ? "{{int:translate-gs-count}}" : 'Count', true );
-       $out->element( $l10n ? "{{int:translate-gs-avgscore}}" : 'Avg. score', 
true );
-
-       $out->blockend();
-
-       ksort( $summary );
-
-       $totals = array( 0, 0 );
-
-       foreach ( $summary as $key => $values ) {
-               $out->blockstart();
-
-               if ( $key == 'multiple' ) {
-                       $out->element( $l10n ? "{{int:translate-gs-multiple}}" 
: 'Multiple' );
-               } else {
-                       $out->element( $l10n ? "{{int:timezoneregion-" . $key . 
"}}" : ucfirst( $key ) );
-               }
-               $out->element( $values[0] );
-               $out->element( number_format( $values[1] / $values[0] ) );
-
-               $out->blockend();
-
-               $totals[0] += $values[0];
-               $totals[1] += $values[1];
-       }
-
-       $out->blockstart();
-       $out->element( $l10n ? "{{int:translate-gs-total}}" : 'Total' );
-       $out->element( $totals[0] );
-       $out->element( number_format( $totals[1] / $totals[0] ) );
-       $out->blockend();
-
-       $out->footer();
-}
-
-// Custom output
-if ( $wmfscore ) {
-       ksort( $wmfscores );
-
-       foreach ( $wmfscores as $code => $stats ) {
-               echo $code . ';' . number_format( $stats['score'] ) . ";\n";
-       }
-}
-
-/**
- * Print a usage message.
- */
-function showUsage() {
-       $msg = <<<PHP
-       --help : this help message
-       --groups LIST: comma separated list of groups
-       --skiplanguages LIST: comma separated list of skipped languages
-       --skipzero : skip languages that do not have any localisation at all
-       --fuzzy : add column for fuzzy counts
-       --output TYPE: select another output engine
-               * 'csv'      : Comma Separated Values.
-               * 'wiki'     : MediaWiki syntax.
-               * 'text'     : Text with tabs.
-       --most : [SCOPE]: report on the 50 most spoken languages. Skipzero is
-                       ignored. If a valid scope is defined, the group list
-                       and fuzzy are ignored and the localisation levels are
-                       weighted and reported.
-               * mediawiki:
-                       core-0-mostused (30%)
-                       core (30%)
-                       ext-0-wikimedia (20%)
-                       ext-0-all (20%)
-               * wikimedia:
-                       core-0-mostused (40%)
-                       core (30%)
-                       ext-0-wikimedia (30%)
-       --speakers : add column for number of speakers (est.). Only valid when
-                    combined with --most.
-       --nol10n : do not add localised language name if I18ntags is installed.
-       --continent : add a continent column. Only available when output is
-                     'wiki' or not specified.
-       --summary : add a summary with counts and scores per continent category
-                   and totals. Only available for a valid 'most' value.
-       --legenddetail : Page name for legend to be transcluded at the top of
-                        the details table
-       --legendsummary : Page name for legend to be transcluded at the top of
-                         the summary table
-       --wmfscore : Only output WMF language code and weighted score for all
-                    language codes for weighing group 'wikimedia' in CSV. This
-                    report must keep a stable layout as it is used/will be
-                    used in the Wikimedia statistics.
-
-PHP;
-       STDERR( $msg );
-       exit( 1 );
-}
+$maintClass = 'GroupStatictics';
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/89114
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I0298d797eecdab6d49f94a5b4a8859d69e008f9f
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/extensions/Translate
Gerrit-Branch: master
Gerrit-Owner: Siebrand <siebr...@wikimedia.org>
Gerrit-Reviewer: Nikerabbit <niklas.laxst...@gmail.com>
Gerrit-Reviewer: Siebrand <siebr...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to