Gilles has submitted this change and it was merged.

Change subject: Track opt-out ratio
......................................................................


Track opt-out ratio

There is not preference history data, so we store daily
counts in an intermediate table and query that table to build
the tsv files.

Change-Id: I697f5db6124d6ddccc2b97680b20fb5d4797488e
Mingle: https://wikimedia.mingle.thoughtworks.com/projects/multimedia/cards/735
---
A build-optout-tsvs
M generate.py
A optout/global.sql
A optout/init.sql
A optout/template.sql
5 files changed, 99 insertions(+), 0 deletions(-)

Approvals:
  Gilles: Verified; Looks good to me, approved



diff --git a/build-optout-tsvs b/build-optout-tsvs
new file mode 100755
index 0000000..36be7e0
--- /dev/null
+++ b/build-optout-tsvs
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+if [[ -z "$REMOTEUSERHOME" ]]
+then
+       echo "You must define the REMOTEUSERHOME environment variable before 
calling this script"
+       exit 1
+fi
+
+MYSQL_CMD="mysql --defaults-file=~/.my.cnf.relevant log"
+SQL_DIR="$REMOTEUSERHOME/tsvs_sql"
+TSV_DIR="$REMOTEUSERHOME/tsvs_new"
+PUBLIC_DIR="/a/public-datasets/all/multimedia"
+CHECKOUT_DIR="$REMOTEUSERHOME/multimedia"
+
+$CHECKOUT_DIR/deploy
+
+for sqlpath in `ls $SQL_DIR/optout/*.sql`; do
+       wikiname=`basename $sqlpath | sed "s/.sql//"`
+       echo "Updating optout counts for $wikiname wiki..."
+       tsvpath=$TSV_DIR/mvo_$wikiname.tsv
+       $MYSQL_CMD < $sqlpath > $tsvpath
+       mv -f $tsvpath $PUBLIC_DIR/media-viewer-optout-$wikiname.tsv
+done
diff --git a/generate.py b/generate.py
index a2d866b..e2f6e9c 100755
--- a/generate.py
+++ b/generate.py
@@ -63,6 +63,7 @@
         os.path.dirname(os.path.realpath(__file__)) +
         '/' + folder + '/template.sql').read(10000)
     sql = sql.replace('%wiki%', replacement)
+    sql = sql.replace('%wikidb%', wiki)
 
     if metric is not None:
         sql = sql.replace('%metricname%', metric['name'])
@@ -87,3 +88,6 @@
         generate('geoperf', wiki, geo_perf_metric)
 
     generate('actions', wiki)
+
+    if wiki != 'global':
+        generate('optout', wiki)
diff --git a/optout/global.sql b/optout/global.sql
new file mode 100644
index 0000000..cb1a835
--- /dev/null
+++ b/optout/global.sql
@@ -0,0 +1,12 @@
+-- return the numbers for the recent days
+SELECT
+    DATE_FORMAT(day, '%Y-%m-%d') as datestring,
+    all_touched,
+    all_active,
+    optout_touched,
+    optout_active,
+    optout_touched / (all_touched + optout_touched) * 100 
optout_touched_percent,
+    optout_active / (all_active + optout_active) * 100 optout_active_percent
+FROM
+    staging.mediaviewer_optout
+;
diff --git a/optout/init.sql b/optout/init.sql
new file mode 100644
index 0000000..467cd74
--- /dev/null
+++ b/optout/init.sql
@@ -0,0 +1,13 @@
+-- schema for the table holding intermediate optout results
+-- the table is stored on analytics-store, in the staging DB
+CREATE TABLE mediaviewer_optout (
+    day DATE,
+    wikidb VARCHAR(16) COMMENT 'wiki dbname (e.g. "commonswiki")',
+    all_touched INT COMMENT 'number of active users per user_touched 
(user_touched > now() - 30 days)',
+    all_active INT COMMENT 'number of active users per standard definition (at 
least 5 edits in last 30 days)',
+    optout_total INT COMMENT 'total number of opted-out users',
+    optout_touched INT COMMENT 'opted-out users who are active per 
user_touched',
+    optout_active INT COMMENT 'opted-out users who are active per standard 
definition'
+);
+CREATE UNIQUE INDEX mediaviewer_optout_day_wikidb ON mediaviewer_optout (day, 
wikidb);
+CREATE INDEX mediaviewer_optout_wikidb ON mediaviewer_optout (wikidb);
diff --git a/optout/template.sql b/optout/template.sql
new file mode 100644
index 0000000..9a462ee
--- /dev/null
+++ b/optout/template.sql
@@ -0,0 +1,47 @@
+USE %wikidb%;
+
+-- put today's numbers into a temporary table
+REPLACE INTO
+    staging.mediaviewer_optout (day, wikidb, all_touched, all_active, 
optout_total, optout_touched, optout_active)
+SELECT
+    CURDATE() day,
+    '%wikidb%' wikidb,
+    SUM(1) all_touched,
+    SUM(edits_in_last_30_days >= 5) all_active,
+    (SELECT COUNT(*) FROM user_properties WHERE up_property = 
'multimediaviewer-enable') optout_total,
+    SUM(up_value IS NOT NULL) optout_touched,
+    SUM(edits_in_last_30_days >= 5 AND up_value IS NOT NULL) optout_active
+FROM
+    user
+    LEFT JOIN user_properties ON user_id = up_user AND up_property = 
'multimediaviewer-enable'
+    LEFT JOIN user_groups ON ug_user = user_id AND ug_group = 'bot'
+    LEFT JOIN (
+        SELECT
+            user_id,
+            SUM(contribs) edits_in_last_30_days
+        FROM
+            user_daily_contribs
+        WHERE
+            day >= NOW() - INTERVAL 30 DAY
+        GROUP BY
+            user_id
+    ) edits_in_30 ON edits_in_30.user_id = user.user_id
+WHERE
+    ug_user IS NULL  -- not a bot
+    AND user_touched > NOW() - INTERVAL 30 DAY
+;
+
+-- return the numbers for the recent days
+SELECT
+    DATE_FORMAT(day, '%Y-%m-%d') as datestring,
+    all_touched,
+    all_active,
+    optout_touched,
+    optout_active,
+    optout_touched / (all_touched + optout_touched) * 100 
optout_touched_percent,
+    optout_active / (all_active + optout_active) * 100 optout_active_percent
+FROM
+    staging.mediaviewer_optout
+WHERE
+    wikidb = '%wikidb%'
+;

-- 
To view, visit https://gerrit.wikimedia.org/r/143501
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I697f5db6124d6ddccc2b97680b20fb5d4797488e
Gerrit-PatchSet: 6
Gerrit-Project: analytics/multimedia
Gerrit-Branch: master
Gerrit-Owner: GergÅ‘ Tisza <[email protected]>
Gerrit-Reviewer: GergÅ‘ Tisza <[email protected]>
Gerrit-Reviewer: Gilles <[email protected]>
Gerrit-Reviewer: Springle <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to