Ottomata has uploaded a new change for review. https://gerrit.wikimedia.org/r/171553
Change subject: Slightly refactor misc::statistics::limn::mobile_data_sync ...................................................................... Slightly refactor misc::statistics::limn::mobile_data_sync This will allow for multliple teams to more easily run generate.py and sync data over to stat1001 using different limn dataset sources. TODO: put generate.py into a generic limn generator repository, rather than requiring that limn-mobile-data repo is cloned for all limn::data::generate jobs. Change-Id: Iec0f862ca3e4ee63570f1c495260807071b1e2a4 --- M manifests/misc/statistics.pp M manifests/site.pp 2 files changed, 113 insertions(+), 33 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/53/171553/1 diff --git a/manifests/misc/statistics.pp b/manifests/misc/statistics.pp index acb8e0d..3751ad1 100644 --- a/manifests/misc/statistics.pp +++ b/manifests/misc/statistics.pp @@ -701,45 +701,41 @@ } } - -# Class: misc::statistics::limn::mobile_data_sync +# == Class misc::statistics::limn::data +# Sets up base directories and repositories +# for using the misc::statistics::limn::data::generate() define. # -# Sets up daily cron jobs to run a script which -# generates csv datafiles from mobile apps statistics -# then rsyncs those files to stat1001 so they can be served publicly -class misc::statistics::limn::mobile_data_sync { +class misc::statistics::limn::data { include misc::statistics::base include misc::statistics::stats_researchdb_password + # Either '/a' or '/srv', depending on the server. :/ $working_path = $misc::statistics::base::working_path + # Directory where the repository of the generate.py will be cloned. $source_dir = "${working_path}/limn-mobile-data" + + # generate.py command to run in a cron. $command = "${source_dir}/generate.py" - $config = "${source_dir}/mobile/" + + # my.cnf credentials file. This is the file rendered by + # misc::statistics::stats_researchdb_password. $mysql_credentials = '/etc/mysql/conf.d/stats-research-client.cnf' - $rsync_from = "${working_path}/limn-public-data" - $output = "${rsync_from}/mobile/datafiles" - $log = '/var/log/limn-mobile-data.log' - $gerrit_repo = 'https://gerrit.wikimedia.org/r/p/analytics/limn-mobile-data.git' + + # cron job logs will be kept here + $log_dir = '/var/log/limn-data' + + # generate.py's repository + $git_remote = 'https://gerrit.wikimedia.org/r/p/analytics/limn-mobile-data.git' + + # public data directory. Data will be synced from here to a public web host. + $public_dir = "${working_path}/limn-public-data" + + # Rsync generated data to stat1001 at http://datasets.wikimedia.org/limn-public-data/ + $rsync_to = "stat1001.wikimedia.org::www/limn-public-data/" + + # user to own files and run cron job as (stats). $user = $misc::statistics::user::username - - $db_user = $passwords::mysql::research::user - $db_pass = $passwords::mysql::research::pass - - git::clone { 'analytics/limn-mobile-data': - ensure => 'latest', - directory => $source_dir, - origin => $gerrit_repo, - owner => $user, - require => [User[$user]], - } - - file { $log: - ensure => 'present', - owner => $user, - group => $user, - mode => '0660', - } # This path is used in the limn-mobile-data config. # Symlink this until they change it. @@ -749,20 +745,104 @@ target => $mysql_credentials, } - file { [$source_dir, $rsync_from, $output]: + # TODO: This repository contains the generate.py script. + # Other limn data repositories only have config and data + # directories. generate.py should be abstracted out into + # a general purupose limn data generator. + # For now, all limn data classes rely on this repository + # and generate.py script to be present. + if !defined(Git::Clone['analytics/limn-mobile-data']) { + git::clone { 'analytics/limn-mobile-data': + ensure => 'latest', + directory => $source_dir, + origin => $git_remote, + owner => $user, + require => [User[$user]], + } + } + + # Make sure these are writeable by $user. + file { [$log_dir, $source_dir, $public_data_dir]: ensure => 'directory', owner => $user, group => wikidev, mode => '0775', } +} - cron { 'rsync_mobile_apps_stats': - command => "python ${command} ${config} >> ${log} 2>&1 && /usr/bin/rsync -rt ${rsync_from}/* stat1001.wikimedia.org::www/limn-public-data/", + + +# == Define: misc::statistics::limn::data::generate +# +# Sets up daily cron jobs to run a script which +# generates csv datafiles and rsyncs those files +# to stat1001 so they can be served publicly. +# +# This requires that a repository with generate.py config +# exists at https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git. +# +# == Usage +# misc::statistics::limn::data::generate { 'mobile': } +# misc::statistics::limn::data::generate { 'flow': } +# ... +# +define misc::statistics::limn::data::generate() { + require misc::statistics::limn::data + + $user = $misc::statistics::limn::data::user + $command = $misc::statistics::limn::data::command + + # A repo at analytics/limn-${title}-data.git had better exist! + $git_remote = "https://gerrit.wikimedia.org/r/p/analytics/limn-${title}-data.git" + + # Directory at which to clone $git_remote + $source_dir = "${misc::statistics::base::limn::data::working_path}/limn-${title}-data" + + # config directory for this limn data generate job + $config_dir = "${$source_dir}/${title}/" + + # log file for the generate cron job + $log = "${misc::statistics::limn::data::log_dir}/limn-${title}-data.log" + + # Rsync from $public_dir/${title} + $rsync_from = "${misc::statistics::limn::data::public_dir}/${title}" + $rsync_to = $misc::statistics::limn::data::rsync_to + + # I'm not totally sure what this is... + $output = "${rsync_from}/mobile/datafiles" + + if !defined(Git::Clone["analytics/limn-${title}-data"]) { + git::clone { "analytics/limn-${title}-data": + ensure => 'latest', + directory => $source_dir, + origin => $git_remote, + owner => $user, + require => [User[$user]], + } + } + + file { [$source_dir, $rsync_from, $output]: + ensure => 'directory', + owner => $misc::statistics::limn::data::user, + group => wikidev, + mode => '0775', + } + + cron { "rsync_${title}_apps_stats": + command => "python ${command} ${config} >> ${log} 2>&1 && /usr/bin/rsync -rt ${rsync_from} ${rsync_to}/", user => $user, minute => 0, } } +# == Class misc::statistics::limn::data::jobs +# Uses the misc::statistics::limn::data::generate define +# to set up cron jobs to generate and sync particular data. +# +class misc::statistics::limn::data::jobs { + misc::statistics::limn::data::generate { 'mobile': } +} + # == Class misc::statistics::geowiki::params # Parameters for geowiki that get used outside this file class misc::statistics::geowiki::params { diff --git a/manifests/site.pp b/manifests/site.pp index f3d67f3..8255a98 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -2590,7 +2590,7 @@ include role::statistics::cruncher include misc::statistics::cron_blog_pageviews - include misc::statistics::limn::mobile_data_sync + include misc::statistics::limn::data::jobs include misc::statistics::researchdb_password class { 'admin': -- To view, visit https://gerrit.wikimedia.org/r/171553 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iec0f862ca3e4ee63570f1c495260807071b1e2a4 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ottomata <o...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits