Ottomata has uploaded a new change for review. https://gerrit.wikimedia.org/r/133225
Change subject: Removing class role::analytics::kraken::jobs::hive::partitions::external ...................................................................... Removing class role::analytics::kraken::jobs::hive::partitions::external We will soon be using Oozie instead of this custom python script to automatically create hive partitions on top of webrequest data. Change-Id: I6acac58c0892431e7abacad3cb1d8f33066ede32 See: https://gerrit.wikimedia.org/r/#/c/131208/ --- M manifests/role/analytics/kraken.pp M manifests/site.pp 2 files changed, 2 insertions(+), 38 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/25/133225/1 diff --git a/manifests/role/analytics/kraken.pp b/manifests/role/analytics/kraken.pp index 54abd83..79b6da1 100644 --- a/manifests/role/analytics/kraken.pp +++ b/manifests/role/analytics/kraken.pp @@ -1,4 +1,6 @@ # kraken.pp - role classes dealing with Kraken data analysis. +# +# NOTE! 'kraken' will be renamed soon. # == Class role::analytics::kraken # Kraken refers to the Analytics codebase used to generate @@ -99,42 +101,6 @@ command => "${script} --start ${start_date} ${datadir} >> ${log_file} 2>&1", user => 'hdfs', minute => 5, - require => Exec["${script}-exists"], - } -} - -# == Class role::analytics::kraken::hive::partitions::external -# Installs cron job that creates external Hive partitions for imported -# datasets in $external_data_hdfs_dir. -class role::analytics::kraken::jobs::hive::partitions::external { - include role::analytics::kraken - - $script = "${role::analytics::kraken::path}/kraken-etl/hive-partitioner" - $datadir = $role::analytics::kraken::external_data_hdfs_dir - $database = 'wmf' - - # Note: I'm not worried about logrotate yet. - # This generates just a few lines per hour. - $log_file = "${role::analytics::kraken::log_dir}/hive-partitioner.log" - - # make sure the script has been deployed. - exec { "${script}-exists": - command => "/usr/bin/test -x ${script}", - # This exec doesn't actually create $script, but - # we don't need to run test it puppet can already - # tell that the file exists. - creates => $script, - } - - - # Use hcatalog jar for JsonSerDe - $hive_options = '--auxpath /usr/lib/hcatalog/share/hcatalog/hcatalog-core-0.5.0-cdh4.3.1.jar' - # cron job to automatically create hive partitions for any - # newly imported data. - cron { 'kraken-create-external-hive-partitions': - command => "${script} --database ${database} --hive-options='${hive_options}' ${datadir} >> ${log_file} 2>&1", - user => 'hdfs', - minute => 21, require => Exec["${script}-exists"], } } diff --git a/manifests/site.pp b/manifests/site.pp index 1f22f86..3bdc9a6 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -230,8 +230,6 @@ include role::analytics::kraken::jobs::import::pagecounts # Imports logs from Kafka into Hadoop (via Camus) include role::analytics::kraken::jobs::import::kafka - # Creates hive partitions on all data in HDFS /wmf/data/external - include role::analytics::kraken::jobs::hive::partitions::external } # analytics1027 hosts the frontend -- To view, visit https://gerrit.wikimedia.org/r/133225 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I6acac58c0892431e7abacad3cb1d8f33066ede32 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ottomata <o...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits