Elukey has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/402783 )
Change subject: profile::hadoop::backup::namenode: improve labs support ...................................................................... profile::hadoop::backup::namenode: improve labs support Bug: T166248 Change-Id: I9cfb7feae45e8a2460e0551800da0f68780b75b6 --- M hieradata/role/common/analytics_cluster/hadoop/standby.yaml M modules/profile/manifests/hadoop/backup/namenode.pp 2 files changed, 22 insertions(+), 11 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/83/402783/1 diff --git a/hieradata/role/common/analytics_cluster/hadoop/standby.yaml b/hieradata/role/common/analytics_cluster/hadoop/standby.yaml index 27029c0..5875973 100644 --- a/hieradata/role/common/analytics_cluster/hadoop/standby.yaml +++ b/hieradata/role/common/analytics_cluster/hadoop/standby.yaml @@ -32,4 +32,6 @@ profile::analytics::database::meta::backup_dest::oozie_host: 'analytics1003.eqiad.wmnet' profile::hadoop::firewall::master::analytics_srange: '$ANALYTICS_NETWORKS' -profile::hadoop::firewall::master::analytics_druid_srange: '(($ANALYTICS_NETWORKS $DRUID_PUBLIC_HOSTS))' \ No newline at end of file +profile::hadoop::firewall::master::analytics_druid_srange: '(($ANALYTICS_NETWORKS $DRUID_PUBLIC_HOSTS))' + +profile::hadoop::backup::namenode::monitoring_enabled: true \ No newline at end of file diff --git a/modules/profile/manifests/hadoop/backup/namenode.pp b/modules/profile/manifests/hadoop/backup/namenode.pp index bdeb567..1193ea4 100644 --- a/modules/profile/manifests/hadoop/backup/namenode.pp +++ b/modules/profile/manifests/hadoop/backup/namenode.pp @@ -4,14 +4,21 @@ # and ensures that bacula backs up Hadoop NameNode fsimages, # in the case we need to recover if both Hadoop NameNodes. # -class profile::hadoop::backup::namenode { +class profile::hadoop::backup::namenode( + $monitoring_enabled = hiera('profile::hadoop::backup::namenode::monitoring_enabled', false), +) { require ::profile::hadoop::common + + $backup_dir_group = $::realm ? { + 'production' => 'analytics-admins', + 'labs' => "project-${::labsproject}", + } if !defined(File['/srv/backup']) { file { '/srv/backup': ensure => 'directory', owner => 'root', - group => 'analytics-admins', + group => $backup_dir_group, mode => '0755', } } @@ -23,7 +30,7 @@ ]: ensure => 'directory', owner => 'hdfs', - group => 'analytics-admins', + group => $backup_dir_group, mode => '0750', require => File['/srv/backup'] } @@ -51,13 +58,15 @@ } } - # Alert if backup gets stale. - $warning_threshold_hours = 26 - $critical_threshold_hours = 48 - nrpe::monitor_service { 'hadoop-namenode-backup-age': - description => 'Age of most recent Hadoop NameNode backup files', - nrpe_command => "/usr/bin/sudo /usr/local/lib/nagios/plugins/check_newest_file_age -V -C -d ${destination} -w ${$warning_threshold_hours} -c ${critical_threshold_hours}", - contact_group => 'analytics', + if $monitoring_enabled { + # Alert if backup gets stale. + $warning_threshold_hours = 26 + $critical_threshold_hours = 48 + nrpe::monitor_service { 'hadoop-namenode-backup-age': + description => 'Age of most recent Hadoop NameNode backup files', + nrpe_command => "/usr/bin/sudo /usr/local/lib/nagios/plugins/check_newest_file_age -V -C -d ${destination} -w ${$warning_threshold_hours} -c ${critical_threshold_hours}", + contact_group => 'analytics', + } } # Bacula will also back up this directory. -- To view, visit https://gerrit.wikimedia.org/r/402783 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I9cfb7feae45e8a2460e0551800da0f68780b75b6 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Elukey <ltosc...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits