Faidon Liambotis has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/357992 )
Change subject: raid: remove the option to check write cache policies
......................................................................
raid: remove the option to check write cache policies
RAID controllers already have a desired and an effective state (called a
"Default" and a "Current" policy in megacli). Divergence between those
two should be treated as an alert, but the setting of the Default policy
shouldn't be checked upon and alerted by Icinga; it's a config change
and it should be treated as that (more on that later).
This reverts commits:
- 776b1d511f30eba2c80a8a3956c5207c13d62c2c
- e9fe0118a85ba9365bece15f9d8a43286944bef4
Change-Id: I1998c703abb989710ac61c008198add7fec4afd9
---
M hieradata/regex.yaml
M hieradata/role/common/analytics_cluster/hadoop/worker.yaml
M modules/base/manifests/monitoring/host.pp
M modules/icinga/files/raid_handler.py
M modules/profile/manifests/base.pp
M modules/raid/files/check-raid.py
M modules/raid/manifests/init.pp
7 files changed, 7 insertions(+), 32 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/92/357992/1
diff --git a/hieradata/regex.yaml b/hieradata/regex.yaml
index 4f9f056..a11752d 100644
--- a/hieradata/regex.yaml
+++ b/hieradata/regex.yaml
@@ -328,11 +328,3 @@
admin::groups:
- labtest-roots
openstack::version: 'liberty'
-
-# require WriteBack policy to all hosts (independent of the role) that look
-# like databases. Currently this only works for megacli systems, but it will
-# be applied to HP RAIDs when available. Systems where this is not currently
-# compatible are ignored and do not send any alert.
-db_like_raid_policy:
- __regex: !ruby/regexp
/^(db|dbstore|es|pc|labsdb)[12]\d\d\d\.(eqiad|codfw)\.wmnet$/
- profile::base::check_raid_policy: 'WriteBack'
diff --git a/hieradata/role/common/analytics_cluster/hadoop/worker.yaml
b/hieradata/role/common/analytics_cluster/hadoop/worker.yaml
index 83b8dc7..0add55e 100644
--- a/hieradata/role/common/analytics_cluster/hadoop/worker.yaml
+++ b/hieradata/role/common/analytics_cluster/hadoop/worker.yaml
@@ -9,4 +9,3 @@
# Analytics worker disks are large. We will install a custom
# NRPE check for them, so the base module's should ignore them.
profile::base::check_disk_options: '-w 6% -c 3% -l -e -A -i
"/var/lib/hadoop/data"'
-profile::base::check_raid_policy: 'WriteBack'
\ No newline at end of file
diff --git a/modules/base/manifests/monitoring/host.pp
b/modules/base/manifests/monitoring/host.pp
index dfa7940..95225ad 100644
--- a/modules/base/manifests/monitoring/host.pp
+++ b/modules/base/manifests/monitoring/host.pp
@@ -28,14 +28,11 @@
# that are purposefully at 99%. Better ideas are welcome.
$nrpe_check_disk_options = '-w 6% -c 3% -l -e -A -i "/srv/sd[a-b][1-3]"
--exclude-type=tracefs',
$nrpe_check_disk_critical = false,
- $raid_write_cache_policy = undef,
) {
include ::base::puppet::params # In order to be able to use some variables
# RAID checks
- class { 'raid':
- write_cache_policy => $raid_write_cache_policy,
- }
+ include ::raid
::monitoring::host { $::hostname: }
diff --git a/modules/icinga/files/raid_handler.py
b/modules/icinga/files/raid_handler.py
index 6155b4c..503a60e 100644
--- a/modules/icinga/files/raid_handler.py
+++ b/modules/icinga/files/raid_handler.py
@@ -20,7 +20,7 @@
COMPRESSED_RAID_TYPES = ('megacli', 'hpssacli')
SKIP_STRINGS = ('timeout', 'timed out', 'connection refused', 'out of bounds',
- 'must have write cache policy', 'Could not complete SSL
handshake')
+ 'Could not complete SSL handshake')
LOG_PATH = '/var/log/icinga/raid_handler.log'
COMMAND_FILE = '/var/lib/nagios/rw/nagios.cmd'
diff --git a/modules/profile/manifests/base.pp
b/modules/profile/manifests/base.pp
index 280f629..18d251a 100644
--- a/modules/profile/manifests/base.pp
+++ b/modules/profile/manifests/base.pp
@@ -11,7 +11,6 @@
$group_contact = hiera('contactgroups', 'admins'),
$check_disk_options = hiera('profile::base::check_disk_options', '-w 6% -c
3% -l -e -A -i "/srv/sd[a-b][1-3]" --exclude-type=tracefs'),
$check_disk_critical = hiera('profile::base::check_disk_critical', false),
- $check_raid_policy = hiera('profile::base::check_raid_policy', undef),
) {
require ::profile::base::certificates
class { '::apt':
@@ -96,7 +95,6 @@
contact_group => $group_contact,
nrpe_check_disk_options => $check_disk_options,
nrpe_check_disk_critical => $check_disk_critical,
- raid_write_cache_policy => $check_raid_policy,
}
}
diff --git a/modules/raid/files/check-raid.py b/modules/raid/files/check-raid.py
index 12ce716..94561e9 100644
--- a/modules/raid/files/check-raid.py
+++ b/modules/raid/files/check-raid.py
@@ -208,9 +208,8 @@
return 2
if wrongPolicyLD > 0:
- print(('CRITICAL: %d LD(s) must have write cache policy %s, '
- 'currently using: %s') % (wrongPolicyLD, policy,
- ", ".join(currentWrongPolicies)))
+ print 'CRITICAL: %d LD(s) not in %s policy (%s)' % (
+ wrongPolicyLD, policy, ", ".join(currentWrongPolicies))
return 2
if policy is None:
diff --git a/modules/raid/manifests/init.pp b/modules/raid/manifests/init.pp
index bfc7d99..e74e2a9 100644
--- a/modules/raid/manifests/init.pp
+++ b/modules/raid/manifests/init.pp
@@ -3,24 +3,14 @@
# Class to set up monitoring for software and hardware RAID
#
# === Parameters
-# * write_cache_policy: if set, it will check that the write cache
-# policy of all logical drives matches the one
-# given, normally 'WriteBack' or 'WriteThrough'.
-# Currently only works for Megacli systems, it is
-# ignored in all other cases.
+#
# === Examples
#
# include raid
-class raid (
- $write_cache_policy = undef,
-){
+class raid {
- if $write_cache_policy {
- $check_raid = "/usr/bin/sudo /usr/local/lib/nagios/plugins/check_raid
--policy ${write_cache_policy}"
- } else {
- $check_raid = '/usr/bin/sudo /usr/local/lib/nagios/plugins/check_raid'
- }
+ $check_raid = '/usr/bin/sudo /usr/local/lib/nagios/plugins/check_raid'
# for 'forking' checks (i.e. all but mdadm, which essentially just reads
# kernel memory from /proc/mdstat) check every $check_interval
--
To view, visit https://gerrit.wikimedia.org/r/357992
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I1998c703abb989710ac61c008198add7fec4afd9
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Faidon Liambotis <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits