Matthias Mullie has uploaded a new change for review. https://gerrit.wikimedia.org/r/71999
Change subject: Generate data for Echo preferences ...................................................................... Generate data for Echo preferences Bug: 46669 Bug: 46672 Bug: 46673 Change-Id: I132522ed4624c9d4598448159ebe146f8c252171 --- A .gitreview A README.md A cron.sh A echo_clicks.sh A echo_preferences_all.sh A echo_preferences_diff.sh A echo_views.sh A newbie_user_ids.sh A wiki_env.sh 9 files changed, 415 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/limn-ee-data refs/changes/99/71999/1 diff --git a/.gitreview b/.gitreview new file mode 100644 index 0000000..1f2f0b1 --- /dev/null +++ b/.gitreview @@ -0,0 +1,5 @@ +[gerrit] +host=gerrit.wikimedia.org +port=29418 +project=analytics/limn-ee-data.git +defaultbranch=master diff --git a/README.md b/README.md new file mode 100644 index 0000000..03f46d6 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# ee-metrics + +Scripts generating data CSVs for certain graphs for WMF ee-dashboard features, on http://ee-dashboard.wmflabs.org/dashboards/enwiki-features/ + +# Usage + +cron.sh will be run on a daily basis. In there are the calls to the scripts that will generate the data. + +The real scripts can be called like this (from cron.sh): + + <script>.sh -h<dbhost> -w<wiki> + +<wiki> is the name of the wiki +<dbhost> is db slave the wiki's data lives (see https://office.wikimedia.org/wiki/Data_access) + +Scripts will write CSVs to /a/limn-public-data/ee/datasets/<wiki> + +The database queries in the scripts have no credentials and will be performed as the user executing the script. diff --git a/cron.sh b/cron.sh new file mode 100755 index 0000000..d9eadb4 --- /dev/null +++ b/cron.sh @@ -0,0 +1,10 @@ +DIR=`dirname $0` + +# this will be called via cron, on a daily basis, and will execute these scripts + +enwiki='-hdb1047.eqiad.wmnet -wenwiki' + +sh $DIR/echo_clicks.sh $enwiki +sh $DIR/echo_views.sh $enwiki +sh $DIR/echo_preferences_all.sh $enwiki +sh $DIR/echo_preferences_diff.sh $enwiki diff --git a/echo_clicks.sh b/echo_clicks.sh new file mode 100755 index 0000000..10c3f4f --- /dev/null +++ b/echo_clicks.sh @@ -0,0 +1,69 @@ +#!/bin/bash -e + +# http://www.mediawiki.org/wiki/Echo/Metrics#Clicks_Dashboards +# Based on EventLogging data + + +DIR=`dirname $0` + +# get $db & $wiki from args +. $DIR/wiki_env.sh -s`basename $0` $@ + +# config +dt=/a/limn-public-data/ee/datasets/$wiki +mkdir -p $dt + + +# clicks by category +category=" +SELECT + DATE(timestamp) AS 'date', + SUM(event_notificationType = 'edit-user-talk') AS 'edit-user-talk', + SUM(event_notificationType = 'edit-thank') AS 'edit-thank', + SUM(event_notificationType = 'mention') AS 'mention', + SUM(event_notificationType = 'page-linked') AS 'page-linked', + SUM(event_notificationType = 'pagetriage-mark-as-reviewed') AS 'pagetriage-mark-as-reviewed', + SUM(event_notificationType = 'reverted') AS 'reverted', + SUM(event_notificationType = 'welcome') AS 'welcome', + SUM(event_notificationType = 'gettingstarted-start-editing') AS 'gettingstarted-start-editing', + SUM(event_notificationType = 'user-rights') AS 'user-rights' +FROM EchoInteraction_5539940 +WHERE wiki = '$wiki' AND event_action = 'notification-link-click' +GROUP BY DATE(timestamp);" + +# clicks by source +src=" +SELECT + DATE(timestamp) AS 'date', + SUM(event_context = 'flyout') AS 'flyout', + SUM(event_context = 'archive') AS 'archive' +FROM EchoInteraction_5539940 +WHERE wiki = '$wiki' AND event_action = 'notification-link-click' +GROUP BY DATE(timestamp);" + +# clicks by user status (newbie|existing) +newbieids=`$DIR/newbie_user_ids.sh -h$db -w$wiki` +usertype=" +SELECT + DATE(timestamp) AS 'date', + SUM(event_userId IN ($newbieids)) AS 'newbie', + SUM(event_userId NOT IN ($newbieids)) AS 'existing' +FROM EchoInteraction_5539940 +WHERE wiki = '$wiki' AND event_action = 'notification-link-click' +GROUP BY DATE(timestamp);" + + +echo "Generating dumps" + +# CSV headers +echo "date,edit-user-talk,edit-thank,mention,page-linked,pagetriage-mark-as-reviewed,reverted,welcome,gettingstarted-start-editing,user-rights" > $dt/echo_clicks_by_category.csv +echo "date,flyout,archive" > $dt/echo_clicks_by_source.csv +echo "date,newbie,existing" > $dt/echo_clicks_by_user_type.csv + +# append daily data +echo $category | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_clicks_by_category.csv +echo $src | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_clicks_by_source.csv +echo $usertype | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_clicks_by_user_type.csv + + +echo "Done" diff --git a/echo_preferences_all.sh b/echo_preferences_all.sh new file mode 100755 index 0000000..3b3bde6 --- /dev/null +++ b/echo_preferences_all.sh @@ -0,0 +1,93 @@ +#!/bin/bash -e + +# http://www.mediawiki.org/wiki/Echo/Metrics#Preferences_Dashboard +# Totals, based on user_properties & default values + + +DIR=`dirname $0` + +# get $db & $wiki from args +. $DIR/wiki_env.sh -s`basename $0` $@ + +# config +dt=/a/limn-public-data/ee/datasets/$wiki +mkdir -p $dt + + +# web notifications, selected value or default if none specified, only if notify-show-link != 0 +webnotifications=" +SELECT + CURDATE() AS 'date', + SUM(IF(p13.up_value = 0, 0, IFNULL(p1.up_value, 1))) AS 'echo-subscriptions-web-edit-user-talk', + SUM(IF(p13.up_value = 0, 0, IFNULL(p3.up_value, 1))) AS 'echo-subscriptions-web-edit-thank', + SUM(IF(p13.up_value = 0, 0, IFNULL(p5.up_value, 1))) AS 'echo-subscriptions-web-mention', + SUM(IF(p13.up_value = 0, 0, IFNULL(p7.up_value, 0))) AS 'echo-subscriptions-web-article-linked', + SUM(IF(p13.up_value = 0, 0, IFNULL(p9.up_value, 1))) AS 'echo-subscriptions-web-page-review', + SUM(IF(p13.up_value = 0, 0, IFNULL(p11.up_value, 1))) AS 'echo-subscriptions-web-reverted' +FROM user +LEFT JOIN user_properties AS p1 ON user_id = p1.up_user AND p1.up_property = 'echo-subscriptions-web-edit-user-talk' +LEFT JOIN user_properties AS p3 ON user_id = p3.up_user AND p3.up_property = 'echo-subscriptions-web-edit-thank' +LEFT JOIN user_properties AS p5 ON user_id = p5.up_user AND p5.up_property = 'echo-subscriptions-web-mention' +LEFT JOIN user_properties AS p7 ON user_id = p7.up_user AND p7.up_property = 'echo-subscriptions-web-article-linked' +LEFT JOIN user_properties AS p9 ON user_id = p9.up_user AND p9.up_property = 'echo-subscriptions-web-page-review' +LEFT JOIN user_properties AS p11 ON user_id = p11.up_user AND p11.up_property = 'echo-subscriptions-web-reverted' +LEFT JOIN user_properties AS p13 ON user_id = p13.up_user AND p13.up_property = 'echo-notify-show-link';" + +# email notifications, selected value or default if none specified, only if email-frequency != never +emailnotifications=" +SELECT + CURDATE() AS 'date', + SUM(IF(p14.up_value = -1, 0, IFNULL(p2.up_value, 0))) AS 'echo-subscriptions-email-edit-user-talk', + SUM(IF(p14.up_value = -1, 0, IFNULL(p4.up_value, 0))) AS 'echo-subscriptions-email-edit-thank', + SUM(IF(p14.up_value = -1, 0, IFNULL(p6.up_value, 0))) AS 'echo-subscriptions-email-mention', + SUM(IF(p14.up_value = -1, 0, IFNULL(p8.up_value, 0))) AS 'echo-subscriptions-email-article-linked', + SUM(IF(p14.up_value = -1, 0, IFNULL(p10.up_value, 0))) AS 'echo-subscriptions-email-page-review', + SUM(IF(p14.up_value = -1, 0, IFNULL(p12.up_value, 0))) AS 'echo-subscriptions-email-reverted' +FROM user +LEFT JOIN user_properties AS p2 ON user_id = p2.up_user AND p2.up_property = 'echo-subscriptions-email-edit-user-talk' +LEFT JOIN user_properties AS p4 ON user_id = p4.up_user AND p4.up_property = 'echo-subscriptions-email-edit-thank' +LEFT JOIN user_properties AS p6 ON user_id = p6.up_user AND p6.up_property = 'echo-subscriptions-email-mention' +LEFT JOIN user_properties AS p8 ON user_id = p8.up_user AND p8.up_property = 'echo-subscriptions-email-article-linked' +LEFT JOIN user_properties AS p10 ON user_id = p10.up_user AND p10.up_property = 'echo-subscriptions-email-page-review' +LEFT JOIN user_properties AS p12 ON user_id = p12.up_user AND p12.up_property = 'echo-subscriptions-email-reverted' +LEFT JOIN user_properties AS p14 ON user_id = p14.up_user AND p14.up_property = 'echo-email-frequency';" + +# email frequency preferences +emailfrequency=" +SELECT + CURDATE() AS 'date', + SUM(IF(up_value = -1, 1, 0)) AS 'never', + SUM(IF(up_value = 0 OR up_value IS NULL, 1, 0)) AS 'immediately', + SUM(IF(up_value = 1, 1, 0)) AS 'daily', + SUM(IF(up_value = 7, 1, 0)) AS 'weekly' +FROM user +LEFT JOIN user_properties ON user_id = up_user AND up_property = 'echo-email-frequency';" + +# alert enabled/disabled +badge=" +SELECT + CURDATE() AS 'date', + SUM(IFNULL(up_value, 1)) AS 'enabled', + SUM(IF(up_value = 0, 1, 0)) AS 'disabled' +FROM user +LEFT JOIN user_properties ON user_id = up_user AND up_property = 'echo-show-alert';" + + +echo "Generating dumps" + +# brand new files = add CSV headers +if [ ! -f $dt/echo_preferences_all_web.csv ] +then + echo "date,edit-user-talk,edit-thank,mention,article-linked,page-review,reverted" | tee $dt/echo_preferences_all_web.csv $dt/echo_preferences_all_email.csv > /dev/null + echo "date,never,immediately,daily,weekly" > $dt/echo_preferences_all_email_frequency.csv + echo "date,enabled,disabled" > $dt/echo_preferences_all_badge.csv +fi + +# append daily data +echo $webnotifications | mysql -h$db -D$wiki -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_preferences_all_web.csv +echo $emailnotifications | mysql -h$db -D$wiki -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_preferences_all_email.csv +echo $emailfrequency | mysql -h$db -D$wiki -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_preferences_all_email_frequency.csv +echo $badge | mysql -h$db -D$wiki -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_preferences_all_badge.csv + + +echo "Done" diff --git a/echo_preferences_diff.sh b/echo_preferences_diff.sh new file mode 100755 index 0000000..2bca465 --- /dev/null +++ b/echo_preferences_diff.sh @@ -0,0 +1,103 @@ +#!/bin/bash -e + +# http://www.mediawiki.org/wiki/Echo/Metrics#Preferences_Dashboard +# Daily differences, based on EventLogging data + + +DIR=`dirname $0` + +# get $db & $wiki from args +. $DIR/wiki_env.sh -s`basename $0` $@ + +# config +dt=/a/limn-public-data/ee/datasets/$wiki +mkdir -p $dt + + +# web notifications +webnotifications=" +SELECT + DATE(timestamp) AS 'date', + + # enable + SUM(event_property = 'echo-subscriptions-web-edit-user-talk' AND event_value = 1) AS 'enable-edit-user-talk', + SUM(event_property = 'echo-subscriptions-web-edit-thank' AND event_value = 1) AS 'enable-edit-thank', + SUM(event_property = 'echo-subscriptions-web-mention' AND event_value = 1) AS 'enable-mention', + SUM(event_property = 'echo-subscriptions-web-article-linked' AND event_value = 1) AS 'enable-article-linked', + SUM(event_property = 'echo-subscriptions-web-page-review' AND event_value = 1) AS 'enable-page-review', + SUM(event_property = 'echo-subscriptions-web-reverted' AND event_value = 1) AS 'enable-reverted', + + # disable + SUM(event_property = 'echo-subscriptions-web-edit-user-talk' AND event_value != 1) AS 'disable-edit-user-talk', + SUM(event_property = 'echo-subscriptions-web-edit-thank' AND event_value != 1) AS 'disable-edit-thank', + SUM(event_property = 'echo-subscriptions-web-mention' AND event_value != 1) AS 'disable-mention', + SUM(event_property = 'echo-subscriptions-web-article-linked' AND event_value != 1) AS 'disable-article-linked', + SUM(event_property = 'echo-subscriptions-web-page-review' AND event_value != 1) AS 'disable-page-review', + SUM(event_property = 'echo-subscriptions-web-reverted' AND event_value != 1) AS 'disable-reverted' +FROM EchoPrefUpdate_5488876 +WHERE wiki = '$wiki' +GROUP BY DATE(timestamp);" + + +# email notifications, selected value or default if none specified, only if email-frequency != never +emailnotifications=" +SELECT + DATE(timestamp) AS 'date', + + # enable + SUM(event_property = 'echo-subscriptions-email-edit-user-talk' AND event_value = 1) AS 'enable-edit-user-talk', + SUM(event_property = 'echo-subscriptions-email-edit-thank' AND event_value = 1) AS 'enable-edit-thank', + SUM(event_property = 'echo-subscriptions-email-mention' AND event_value = 1) AS 'enable-mention', + SUM(event_property = 'echo-subscriptions-email-article-linked' AND event_value = 1) AS 'enable-article-linked', + SUM(event_property = 'echo-subscriptions-email-page-review' AND event_value = 1) AS 'enable-page-review', + SUM(event_property = 'echo-subscriptions-email-reverted' AND event_value = 1) AS 'enable-reverted', + + # disable + SUM(event_property = 'echo-subscriptions-email-edit-user-talk' AND event_value != 1) AS 'disable-edit-user-talk', + SUM(event_property = 'echo-subscriptions-email-edit-thank' AND event_value != 1) AS 'disable-edit-thank', + SUM(event_property = 'echo-subscriptions-email-mention' AND event_value != 1) AS 'disable-mention', + SUM(event_property = 'echo-subscriptions-email-article-linked' AND event_value != 1) AS 'disable-article-linked', + SUM(event_property = 'echo-subscriptions-email-page-review' AND event_value != 1) AS 'disable-page-review', + SUM(event_property = 'echo-subscriptions-email-reverted' AND event_value != 1) AS 'disable-reverted' +FROM EchoPrefUpdate_5488876 +WHERE wiki = '$wiki' +GROUP BY DATE(timestamp);" + +# email frequency preferences +emailfrequency=" +SELECT + DATE(timestamp) AS 'date', + SUM(event_property = 'echo-email-frequency' AND event_value = -1) AS 'never', + SUM(event_property = 'echo-email-frequency' AND event_value = 0) AS 'immediately', + SUM(event_property = 'echo-email-frequency' AND event_value = 1) AS 'daily', + SUM(event_property = 'echo-email-frequency' AND event_value = 7) AS 'weekly' +FROM EchoPrefUpdate_5488876 +WHERE wiki = '$wiki' +GROUP BY DATE(timestamp);" + +# alert enabled/disabled +badge=" +SELECT + DATE(timestamp) AS 'date', + SUM(event_property = 'echo-show-alert' AND event_value = 1) AS 'enable', + SUM(event_property = 'echo-show-alert' AND event_value != 1) AS 'disable' +FROM EchoPrefUpdate_5488876 +WHERE wiki = '$wiki' +GROUP BY DATE(timestamp);" + + +echo "Generating dumps" + +# brand new files = add CSV headers +echo "date,enable-edit-user-talk,enable-edit-thank,enable-mention,enable-article-linked,enable-page-review,enable-reverted,disable-edit-user-talk,disable-edit-thank,disable-mention,disable-article-linked,disable-page-review,disable-reverted" | tee $dt/echo_preferences_diff_web.csv $dt/echo_preferences_diff_email.csv > /dev/null +echo "date,never,immediately,daily,weekly" > $dt/echo_preferences_diff_email_frequency.csv +echo "date,enabled,disabled" > $dt/echo_preferences_diff_badge.csv + +# append daily data +echo $webnotifications | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_preferences_diff_web.csv +echo $emailnotifications | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_preferences_diff_email.csv +echo $emailfrequency | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_preferences_diff_email_frequency.csv +echo $badge | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_preferences_diff_badge.csv + + +echo "Done" diff --git a/echo_views.sh b/echo_views.sh new file mode 100755 index 0000000..881b9e0 --- /dev/null +++ b/echo_views.sh @@ -0,0 +1,69 @@ +#!/bin/bash -e + +# http://www.mediawiki.org/wiki/Echo/Metrics#Views_Dashboard +# Based on EventLogging data + + +DIR=`dirname $0` + +# get $db & $wiki from args +. $DIR/wiki_env.sh -s`basename $0` $@ + +# config +dt=/a/limn-public-data/ee/datasets/$wiki +mkdir -p $dt + + +# views by category +category=" +SELECT + DATE(timestamp) AS 'date', + SUM(event_notificationType = 'edit-user-talk') AS 'edit-user-talk', + SUM(event_notificationType = 'edit-thank') AS 'edit-thank', + SUM(event_notificationType = 'mention') AS 'mention', + SUM(event_notificationType = 'page-linked') AS 'page-linked', + SUM(event_notificationType = 'pagetriage-mark-as-reviewed') AS 'pagetriage-mark-as-reviewed', + SUM(event_notificationType = 'reverted') AS 'reverted', + SUM(event_notificationType = 'welcome') AS 'welcome', + SUM(event_notificationType = 'gettingstarted-start-editing') AS 'gettingstarted-start-editing', + SUM(event_notificationType = 'user-rights') AS 'user-rights' +FROM EchoInteraction_5539940 +WHERE wiki = '$wiki' AND event_action = 'notification-impression' +GROUP BY DATE(timestamp);" + +# views by source +src=" +SELECT + DATE(timestamp) AS 'date', + SUM(event_context = 'flyout') AS 'flyout', + SUM(event_context = 'archive') AS 'archive' +FROM EchoInteraction_5539940 +WHERE wiki = '$wiki' AND event_action = 'notification-impression' +GROUP BY DATE(timestamp);" + +# views by user status (newbie|existing) +newbieids=`$DIR/newbie_user_ids.sh -h$db -w$wiki` +usertype=" +SELECT + DATE(timestamp) AS 'date', + SUM(event_userId IN ($newbieids)) AS 'newbie', + SUM(event_userId NOT IN ($newbieids)) AS 'existing' +FROM EchoInteraction_5539940 +WHERE wiki = '$wiki' AND event_action = 'notification-impression' +GROUP BY DATE(timestamp);" + + +echo "Generating dumps" + +# CSV headers +echo "date,edit-user-talk,edit-thank,mention,page-linked,pagetriage-mark-as-reviewed,reverted,welcome,gettingstarted-start-editing,user-rights" > $dt/echo_views_by_category.csv +echo "date,flyout,archive" > $dt/echo_views_by_source.csv +echo "date,newbie,existing" > $dt/echo_views_by_user_type.csv + +# append daily data +echo $category | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_views_by_category.csv +echo $src | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_views_by_source.csv +echo $usertype | mysql -hdb1047.eqiad.wmnet -Dlog -B | sed '1d;$d;s/\t/,/g' >> $dt/echo_views_by_user_type.csv + + +echo "Done" diff --git a/newbie_user_ids.sh b/newbie_user_ids.sh new file mode 100755 index 0000000..9fe9b16 --- /dev/null +++ b/newbie_user_ids.sh @@ -0,0 +1,17 @@ +#!/bin/bash -e + +# Script will return a comma-separated list or newbie user ids. +# A user is considered newbie if he's within the last 99% joined & +# is not member of a group. + +# get $db & $wiki from args +. `dirname $0`/wiki_env.sh -s`basename $0` $@ + +newbieids="SELECT user_id +FROM user +LEFT JOIN user_groups ON ug_user = user_id +WHERE + user_id > (0.99 * ( SELECT MAX(user_id) FROM user)) AND + ug_group IS NULL;" + +echo $newbieids | mysql -h$db -D$wiki -B -N | tr '\n' ',' | sed 's/,$//' diff --git a/wiki_env.sh b/wiki_env.sh new file mode 100755 index 0000000..cab8e20 --- /dev/null +++ b/wiki_env.sh @@ -0,0 +1,31 @@ +#!/bin/bash -e + +# wiki details, see https://office.wikimedia.org/wiki/Data_access +# This will read -h & -w parameters and assign them to $db and $wiki + +while getopts "s:h:w:" OPTION; do + case $OPTION in + s) + script=$OPTARG + ;; + h) + db=$OPTARG # e.g. db1047.eqiad.wmnet + ;; + w) + wiki=$OPTARG # e.g. enwiki + ;; + *) + break + ;; + esac +done + +if [[ -z $script ]]; then + echo "Original script should call wiki_env.sh like: . `dirname $0`/wiki_env.sh -s`basename $0` $@" + exit 1 +fi + +if [[ -z $db ]] || [[ -z $wiki ]]; then + echo "Usage: $script -h<dbhost> -w<wiki>" + exit 1 +fi -- To view, visit https://gerrit.wikimedia.org/r/71999 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I132522ed4624c9d4598448159ebe146f8c252171 Gerrit-PatchSet: 1 Gerrit-Project: analytics/limn-ee-data Gerrit-Branch: master Gerrit-Owner: Matthias Mullie <mmul...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits