Gilles has submitted this change and it was merged. Change subject: Track opt-out ratio ......................................................................
Track opt-out ratio There is not preference history data, so we store daily counts in an intermediate table and query that table to build the tsv files. Change-Id: I697f5db6124d6ddccc2b97680b20fb5d4797488e Mingle: https://wikimedia.mingle.thoughtworks.com/projects/multimedia/cards/735 --- A build-optout-tsvs M generate.py A optout/global.sql A optout/init.sql A optout/template.sql 5 files changed, 99 insertions(+), 0 deletions(-) Approvals: Gilles: Verified; Looks good to me, approved diff --git a/build-optout-tsvs b/build-optout-tsvs new file mode 100755 index 0000000..36be7e0 --- /dev/null +++ b/build-optout-tsvs @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +if [[ -z "$REMOTEUSERHOME" ]] +then + echo "You must define the REMOTEUSERHOME environment variable before calling this script" + exit 1 +fi + +MYSQL_CMD="mysql --defaults-file=~/.my.cnf.relevant log" +SQL_DIR="$REMOTEUSERHOME/tsvs_sql" +TSV_DIR="$REMOTEUSERHOME/tsvs_new" +PUBLIC_DIR="/a/public-datasets/all/multimedia" +CHECKOUT_DIR="$REMOTEUSERHOME/multimedia" + +$CHECKOUT_DIR/deploy + +for sqlpath in `ls $SQL_DIR/optout/*.sql`; do + wikiname=`basename $sqlpath | sed "s/.sql//"` + echo "Updating optout counts for $wikiname wiki..." + tsvpath=$TSV_DIR/mvo_$wikiname.tsv + $MYSQL_CMD < $sqlpath > $tsvpath + mv -f $tsvpath $PUBLIC_DIR/media-viewer-optout-$wikiname.tsv +done diff --git a/generate.py b/generate.py index a2d866b..e2f6e9c 100755 --- a/generate.py +++ b/generate.py @@ -63,6 +63,7 @@ os.path.dirname(os.path.realpath(__file__)) + '/' + folder + '/template.sql').read(10000) sql = sql.replace('%wiki%', replacement) + sql = sql.replace('%wikidb%', wiki) if metric is not None: sql = sql.replace('%metricname%', metric['name']) @@ -87,3 +88,6 @@ generate('geoperf', wiki, geo_perf_metric) generate('actions', wiki) + + if wiki != 'global': + generate('optout', wiki) diff --git a/optout/global.sql b/optout/global.sql new file mode 100644 index 0000000..cb1a835 --- /dev/null +++ b/optout/global.sql @@ -0,0 +1,12 @@ +-- return the numbers for the recent days +SELECT + DATE_FORMAT(day, '%Y-%m-%d') as datestring, + all_touched, + all_active, + optout_touched, + optout_active, + optout_touched / (all_touched + optout_touched) * 100 optout_touched_percent, + optout_active / (all_active + optout_active) * 100 optout_active_percent +FROM + staging.mediaviewer_optout +; diff --git a/optout/init.sql b/optout/init.sql new file mode 100644 index 0000000..467cd74 --- /dev/null +++ b/optout/init.sql @@ -0,0 +1,13 @@ +-- schema for the table holding intermediate optout results +-- the table is stored on analytics-store, in the staging DB +CREATE TABLE mediaviewer_optout ( + day DATE, + wikidb VARCHAR(16) COMMENT 'wiki dbname (e.g. "commonswiki")', + all_touched INT COMMENT 'number of active users per user_touched (user_touched > now() - 30 days)', + all_active INT COMMENT 'number of active users per standard definition (at least 5 edits in last 30 days)', + optout_total INT COMMENT 'total number of opted-out users', + optout_touched INT COMMENT 'opted-out users who are active per user_touched', + optout_active INT COMMENT 'opted-out users who are active per standard definition' +); +CREATE UNIQUE INDEX mediaviewer_optout_day_wikidb ON mediaviewer_optout (day, wikidb); +CREATE INDEX mediaviewer_optout_wikidb ON mediaviewer_optout (wikidb); diff --git a/optout/template.sql b/optout/template.sql new file mode 100644 index 0000000..9a462ee --- /dev/null +++ b/optout/template.sql @@ -0,0 +1,47 @@ +USE %wikidb%; + +-- put today's numbers into a temporary table +REPLACE INTO + staging.mediaviewer_optout (day, wikidb, all_touched, all_active, optout_total, optout_touched, optout_active) +SELECT + CURDATE() day, + '%wikidb%' wikidb, + SUM(1) all_touched, + SUM(edits_in_last_30_days >= 5) all_active, + (SELECT COUNT(*) FROM user_properties WHERE up_property = 'multimediaviewer-enable') optout_total, + SUM(up_value IS NOT NULL) optout_touched, + SUM(edits_in_last_30_days >= 5 AND up_value IS NOT NULL) optout_active +FROM + user + LEFT JOIN user_properties ON user_id = up_user AND up_property = 'multimediaviewer-enable' + LEFT JOIN user_groups ON ug_user = user_id AND ug_group = 'bot' + LEFT JOIN ( + SELECT + user_id, + SUM(contribs) edits_in_last_30_days + FROM + user_daily_contribs + WHERE + day >= NOW() - INTERVAL 30 DAY + GROUP BY + user_id + ) edits_in_30 ON edits_in_30.user_id = user.user_id +WHERE + ug_user IS NULL -- not a bot + AND user_touched > NOW() - INTERVAL 30 DAY +; + +-- return the numbers for the recent days +SELECT + DATE_FORMAT(day, '%Y-%m-%d') as datestring, + all_touched, + all_active, + optout_touched, + optout_active, + optout_touched / (all_touched + optout_touched) * 100 optout_touched_percent, + optout_active / (all_active + optout_active) * 100 optout_active_percent +FROM + staging.mediaviewer_optout +WHERE + wikidb = '%wikidb%' +; -- To view, visit https://gerrit.wikimedia.org/r/143501 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I697f5db6124d6ddccc2b97680b20fb5d4797488e Gerrit-PatchSet: 6 Gerrit-Project: analytics/multimedia Gerrit-Branch: master Gerrit-Owner: Gergő Tisza <[email protected]> Gerrit-Reviewer: Gergő Tisza <[email protected]> Gerrit-Reviewer: Gilles <[email protected]> Gerrit-Reviewer: Springle <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
