Yurik has submitted this change and it was merged.

Change subject: Import from GitHub
......................................................................


Import from GitHub

Bug: T143048

Change-Id: I93756fc884019907ff9d1e8764111ce3f769c791
---
A .gitignore
A .gitreview
A LICENSE
A composer.json
A config.json
A src/Api.php
A src/NormalSite.php
A src/Site.php
A src/SiteMatrix.php
A src/SpecialSite.php
A src/TrackingCategories.php
A tracking-category-count.php
12 files changed, 310 insertions(+), 0 deletions(-)

Approvals:
  Yurik: Verified; Looks good to me, approved



diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d1502b0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+vendor/
+composer.lock
diff --git a/.gitreview b/.gitreview
new file mode 100644
index 0000000..acf2022
--- /dev/null
+++ b/.gitreview
@@ -0,0 +1,6 @@
+[gerrit]
+host=gerrit.wikimedia.org
+port=29418
+project=analytics/discovery-stats.git
+defaultbranch=master
+defaultrebase=0
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..cf1ab25
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org>
diff --git a/composer.json b/composer.json
new file mode 100644
index 0000000..4695297
--- /dev/null
+++ b/composer.json
@@ -0,0 +1,18 @@
+{
+       "description": "Discovery team's statistics thingie",
+       "license": "The Unlicense",
+       "homepage": "https://www.mediawiki.org/wiki/Wikimedia_Discovery";,
+       "authors": [
+               {
+                       "name": "Max Semenik"
+               }
+       ],
+       "autoload": {
+               "classmap": [
+                       "src/"
+               ]
+       },
+       "require": {
+               "php": ">=5.5"
+       }
+}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..1f7abaf
--- /dev/null
+++ b/config.json
@@ -0,0 +1,8 @@
+{
+    "graphiteHost": "graphite.eqiad.wmnet",
+    "graphitePort": 8126,
+    "categories": {
+        "kartographer-tracking-category": "kartographer.pages.%WIKI%.hourly",
+        "graph-tracking-category": "graph.pages.%WIKI%.hourly"
+    }
+}
diff --git a/src/Api.php b/src/Api.php
new file mode 100644
index 0000000..18819a6
--- /dev/null
+++ b/src/Api.php
@@ -0,0 +1,20 @@
+<?php
+
+namespace DiscoveryStats;
+
+class Api {
+    public static function get( $url, $params ) {
+        $params['format'] = 'json';
+        $params['formatversion'] = 2;
+
+        $arr = [];
+        foreach ( $params as $key => $value ) {
+            $arr[] = $key . '=' . urlencode( $value );
+        }
+        $paramsStr = implode( '&', $arr );
+
+        return json_decode( file_get_contents( "{$url}/w/api.php?{$paramsStr}" 
) );
+    }
+}
+
+ini_set( 'user_agent', 'Discovery team statistics' );
diff --git a/src/NormalSite.php b/src/NormalSite.php
new file mode 100644
index 0000000..1f722ea
--- /dev/null
+++ b/src/NormalSite.php
@@ -0,0 +1,20 @@
+<?php
+
+namespace DiscoveryStats;
+
+class NormalSite extends Site {
+    private $langCode;
+
+    public function __construct( $data, $langCode ) {
+        $this->langCode = $langCode;
+        parent::__construct( $data );
+    }
+
+    public function getFamily() {
+        return $this->data->code;
+    }
+
+    public function getCode() {
+        return $this->langCode;
+    }
+}
diff --git a/src/Site.php b/src/Site.php
new file mode 100644
index 0000000..4960514
--- /dev/null
+++ b/src/Site.php
@@ -0,0 +1,35 @@
+<?php
+
+namespace DiscoveryStats;
+
+abstract class Site {
+    protected $data;
+
+    public function __construct( $data ) {
+        $this->data = $data;
+    }
+
+    public function getUrl() {
+        return $this->data->url;
+    }
+
+    public function getName() {
+        return $this->data->sitename;
+    }
+
+    public function getDbName() {
+        return $this->data->dbname;
+    }
+
+    public function isPrivate() {
+        return isset( $this->data->private ) && $this->data->private !== false;
+    }
+
+    public function isFishbowl() {
+        return isset( $this->data->fishbowl ) && $this->data->fishbowl !== 
false;
+    }
+
+    public abstract function getFamily();
+
+    public abstract function getCode();
+}
diff --git a/src/SiteMatrix.php b/src/SiteMatrix.php
new file mode 100644
index 0000000..eb0474f
--- /dev/null
+++ b/src/SiteMatrix.php
@@ -0,0 +1,32 @@
+<?php
+
+namespace DiscoveryStats;
+
+class SiteMatrix {
+    private $sites = [];
+
+    public function getSites() {
+        if ( $this->sites ) {
+            return $this->sites;
+        }
+
+        $matrix = Api::get( 'https://meta.wikimedia.org',
+            [ 'action' => 'sitematrix' ]
+        );
+        $matrix = (array)$matrix->sitematrix;
+
+        foreach ( $matrix['specials'] as $site ) {
+            $this->sites[$site->dbname] = new SpecialSite( $site );
+        }
+        unset( $matrix['specials'] );
+        unset( $matrix['count'] );
+
+        foreach ( $matrix as $language ) {
+            foreach ( $language->site as $site ) {
+                $this->sites[$site->dbname] = new NormalSite( $site, 
$language->code );
+            }
+        }
+
+        return $this->sites;
+    }
+}
diff --git a/src/SpecialSite.php b/src/SpecialSite.php
new file mode 100644
index 0000000..2618d9d
--- /dev/null
+++ b/src/SpecialSite.php
@@ -0,0 +1,13 @@
+<?php
+
+namespace DiscoveryStats;
+
+class SpecialSite extends Site {
+    public function getFamily() {
+        return 'special';
+    }
+
+    public function getCode() {
+        return $this->data->code;
+    }
+}
diff --git a/src/TrackingCategories.php b/src/TrackingCategories.php
new file mode 100644
index 0000000..f9c3bf8
--- /dev/null
+++ b/src/TrackingCategories.php
@@ -0,0 +1,73 @@
+<?php
+
+namespace DiscoveryStats;
+
+class TrackingCategories {
+    private $site;
+
+    public function __construct( Site $site ) {
+        $this->site = $site;
+    }
+
+    public function getCounts( array $categories ) {
+        $mapping = $this->getCategoryNames( $categories );
+        $inverseMapping = array_flip( $mapping );
+
+        $titles = implode( '|', array_map(
+            function( $name ) {
+                return "Category:$name";
+            },
+            array_values( $mapping )
+        ) );
+        $result = Api::get( $this->site->getUrl(), [
+            'action' => 'query',
+            'prop' => 'categoryinfo',
+            'titles' => $titles,
+        ] );
+
+        $counts = [];
+        foreach ( $result->query->pages as $page ) {
+            if ( !isset( $page->categoryinfo ) ) {
+                continue;
+            }
+            list( , $cat ) = explode( ':', $page->title, 2 );
+            $counts[ $inverseMapping[$cat] ] = $page->categoryinfo->size;
+        }
+
+        return $counts;
+    }
+
+    private function getCategoryNames( array $categories ) {
+        // Get local tracking category name. Parse it because it might contain
+        // wikitext e.g. {{#ifeq:{{NAMESPACE}}||Articles with maps|Pages with 
maps}}.
+        // In case such difference is present, care about mainspace only.
+        $wikitext = implode( "\n\n", array_map(
+            function( $category ) {
+                return "$category={{int:$category}}";
+            },
+            $categories
+        ) );
+        $siteinfo = Api::get( $this->site->getUrl(), [
+            'action' => 'parse',
+            'title' => 'foo',
+            'contentmodel' => 'wikitext',
+            'text' => $wikitext,
+        ] );
+
+        $decoded = trim( htmlspecialchars_decode( strip_tags( 
$siteinfo->parse->text ) ) );
+        $mapping = [];
+        $lines = explode( "\n", $decoded );
+        foreach ( $lines as $line ) {
+            list( $key, $category ) = explode( '=', trim( $line ), 2 );
+            if ( !$category || !in_array( $key, $categories ) ) {
+                throw new \Exception( "{$this->site->getDbName()} returned an 
undexpected response: $decoded" );
+            }
+            if ( $category[0] == '<' ) {
+                continue; // Extension not installed
+            }
+            $mapping[$key] = $category;
+        }
+
+        return $mapping;
+    }
+}
diff --git a/tracking-category-count.php b/tracking-category-count.php
new file mode 100644
index 0000000..3bf8a86
--- /dev/null
+++ b/tracking-category-count.php
@@ -0,0 +1,59 @@
+<?php
+
+namespace DiscoveryStats;
+
+use Liuggio\StatsdClient\StatsdClient;
+use Liuggio\StatsdClient\Sender\SocketSender;
+use Liuggio\StatsdClient\Service\StatsdService;
+
+require_once( 'vendor/autoload.php' );
+
+$wikiBlacklist = [
+    'ukwikimedia', // redirected
+];
+
+$debug = in_array( '--debug', $argv );
+
+$config = json_decode( file_get_contents( 'config.json' ) );
+$config->categories = (array)$config->categories;
+$categoryKeys = array_keys( $config->categories );
+
+function recordToGraphite( $wiki, $metric, $count ) {
+    global $config;
+
+    if ( !$config->graphiteHost || !$config->graphitePort ) {
+        return;
+    }
+
+    $key = str_replace( '%WIKI%', $wiki, $config->categories[$metric] );
+
+    exec( "echo \"$metric $count `date +%s`\" | nc -q0 {$config->graphiteHost} 
{$config->graphitePort}" );
+}
+
+$matrix = new SiteMatrix;
+
+$totalCounts = array_fill_keys( $categoryKeys, 0 );
+foreach ( $matrix->getSites() as $site ) {
+    if ( $site->isPrivate() || in_array( $site->getDbName(), $wikiBlacklist ) 
) {
+        continue;
+    }
+    $siteKey = $site->getFamily() . '.' . $site->getCode();
+    $tracking = new TrackingCategories( $site );
+
+    $counts = $tracking->getCounts( $categoryKeys );
+    foreach ( $counts as $metric => $count ) {
+        $totalCounts[$metric] += $count;
+        recordToGraphite( $siteKey, $metric, $count );
+    }
+    if ( $debug ) {
+        echo "{$site->getDbName()} "; var_dump($counts);
+    }
+}
+
+foreach ( $totalCounts as $metric => $count ) {
+    recordToGraphite( 'total', $metric, $count );
+}
+
+if ( $debug ) {
+    var_dump($totalCounts);
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/306699
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I93756fc884019907ff9d1e8764111ce3f769c791
Gerrit-PatchSet: 4
Gerrit-Project: analytics/discovery-stats
Gerrit-Branch: master
Gerrit-Owner: MaxSem <maxsem.w...@gmail.com>
Gerrit-Reviewer: Gehel <gleder...@wikimedia.org>
Gerrit-Reviewer: Yurik <yu...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to