Filippo Giunchedi has submitted this change and it was merged.

Change subject: icinga: update checks after statsite migration
......................................................................


icinga: update checks after statsite migration

Change-Id: Ief9572bef4872fc8999ef5ec4a3a3e67ac500d84
---
M manifests/role/cache.pp
M manifests/role/labsnfs.pp
M manifests/role/nova.pp
M manifests/swift.pp
M modules/mediawiki/manifests/monitoring/webserver.pp
M modules/swift_new/manifests/monitoring/graphite.pp
6 files changed, 12 insertions(+), 12 deletions(-)

Approvals:
  Filippo Giunchedi: Verified; Looks good to me, approved



diff --git a/manifests/role/cache.pp b/manifests/role/cache.pp
index 6c2a413..2342f0a 100644
--- a/manifests/role/cache.pp
+++ b/manifests/role/cache.pp
@@ -532,7 +532,7 @@
         # (logster only reports once a minute)
         monitoring::graphite_threshold { 'varnishkafka-kafka_drerr':
             description     => 'Varnishkafka Delivery Errors per minute',
-            metric          => 
"derivative(${graphite_metric_prefix}.varnishkafka.kafka_drerr.value)",
+            metric          => 
"derivative(${graphite_metric_prefix}.varnishkafka.kafka_drerr)",
             # warn if more than 0 errors per minute in the last 10 minutes
             warning         => 0,
             # critical if more than 20000 errors per minute in the last 10 
minutes
diff --git a/manifests/role/labsnfs.pp b/manifests/role/labsnfs.pp
index 416396e..c3253c1 100644
--- a/manifests/role/labsnfs.pp
+++ b/manifests/role/labsnfs.pp
@@ -55,7 +55,7 @@
 
     monitoring::graphite_threshold { 'network_out_saturated':
         description => 'Outgoing network saturation',
-        metric      => 
"servers.${::hostname}.network.${monitor_iface}.tx_byte.value",
+        metric      => 
"servers.${::hostname}.network.${monitor_iface}.tx_byte",
         from        => '30min',
         warning     => '75000000',  # roughly 600Mbps / 1Gbps
         critical    => '100000000', # roughly 800Mbps / 1Gbps
@@ -64,7 +64,7 @@
 
     monitoring::graphite_threshold { 'network_in_saturated':
         description => 'Incoming network saturation',
-        metric      => 
"servers.${::hostname}.network.${monitor_iface}.rx_byte.value",
+        metric      => 
"servers.${::hostname}.network.${monitor_iface}.rx_byte",
         from        => '30min',
         warning     => '75000000',  # roughly 600Mbps / 1Gbps
         critical    => '100000000', # roughly 800Mbps / 1Gbps
@@ -73,7 +73,7 @@
 
     monitoring::graphite_threshold { 'high_iowait_stalling':
         description => 'Persistent high iowait',
-        metric      => "servers.${::hostname}.cpu.total.iowait.value",
+        metric      => "servers.${::hostname}.cpu.total.iowait",
         from        => '10min',
         warning     => '25', # Based off looking at history of metric
         critical    => '35',
@@ -83,7 +83,7 @@
     # Monitor for high load consistently, is a 'catchall'
     monitoring::graphite_threshold { 'high_load':
         description => 'High load for whatever reason',
-        metric      => "servers.${::hostname}.cpu.total.iowait.value",
+        metric      => "servers.${::hostname}.cpu.total.iowait",
         from        => '10min',
         warning     => '16',
         critical    => '24',
diff --git a/manifests/role/nova.pp b/manifests/role/nova.pp
index 37ff0b4..8e0adf4 100644
--- a/manifests/role/nova.pp
+++ b/manifests/role/nova.pp
@@ -362,7 +362,7 @@
     # but graphite_threshold doesn't support that.
     monitoring::graphite_threshold { 'conntrack_saturated':
         description => 'Connection tracking saturation',
-        metric      => 
"servers.${::hostname}.ConntrackCollector.network.netfilter.conntrack_count.value",
+        metric      => 
"servers.${::hostname}.ConntrackCollector.network.netfilter.conntrack_count",
         from        => '5min',
         warning     => '241664', # (~90%)
         critical    => '258048', # (~98%)
diff --git a/manifests/swift.pp b/manifests/swift.pp
index ca83755..a951fa0 100644
--- a/manifests/swift.pp
+++ b/manifests/swift.pp
@@ -195,7 +195,7 @@
 class swift::monitoring::graphite {
     monitoring::graphite_threshold { 'swift_eqiad-prod_dispersion_object':
         description     => 'swift eqiad-prod object availability',
-        metric          => 
'swift.eqiad-prod.dispersion.object.pct_found.value',
+        metric          => 'swift.eqiad-prod.dispersion.object.pct_found',
         from            => '1hours',
         warning         => 95,
         critical        => 90,
@@ -205,7 +205,7 @@
 
     monitoring::graphite_threshold { 'swift_eqiad-prod_dispersion_container':
         description     => 'swift eqiad-prod container availability',
-        metric          => 
'swift.eqiad-prod.dispersion.container.pct_found.value',
+        metric          => 'swift.eqiad-prod.dispersion.container.pct_found',
         from            => '30min',
         warning         => 92,
         critical        => 88,
diff --git a/modules/mediawiki/manifests/monitoring/webserver.pp 
b/modules/mediawiki/manifests/monitoring/webserver.pp
index 79dfeb5..21ddfbc 100644
--- a/modules/mediawiki/manifests/monitoring/webserver.pp
+++ b/modules/mediawiki/manifests/monitoring/webserver.pp
@@ -17,7 +17,7 @@
 
         monitoring::graphite_threshold { 'hhvm_queue_size':
             description     => 'HHVM queue size',
-            metric          => 
"servers.${::hostname}.hhvmHealthCollector.queued.value",
+            metric          => 
"servers.${::hostname}.hhvmHealthCollector.queued",
             warning         => 10,
             critical        => 80,
             percentage      => 30,
@@ -26,7 +26,7 @@
 
         monitoring::graphite_threshold { 'hhvm_load':
             description     => 'HHVM busy threads',
-            metric          => 
"servers.${::hostname}.hhvmHealthCollector.load.value",
+            metric          => 
"servers.${::hostname}.hhvmHealthCollector.load",
             warning         => $::mediawiki::hhvm::max_threads*0.6,
             critical        => $::mediawiki::hhvm::max_threads * 0.9,
             percentage      => 30,
diff --git a/modules/swift_new/manifests/monitoring/graphite.pp 
b/modules/swift_new/manifests/monitoring/graphite.pp
index 2b7460c..b48111c 100644
--- a/modules/swift_new/manifests/monitoring/graphite.pp
+++ b/modules/swift_new/manifests/monitoring/graphite.pp
@@ -3,7 +3,7 @@
 ) {
     monitoring::graphite_threshold { 
"swift_${swift_cluster}_dispersion_object":
         description     => "swift ${swift_cluster} object availability",
-        metric          => 
"swift.${swift_cluster}.dispersion.object.pct_found.value",
+        metric          => 
"swift.${swift_cluster}.dispersion.object.pct_found",
         from            => '1hours',
         warning         => 95,
         critical        => 90,
@@ -13,7 +13,7 @@
 
     monitoring::graphite_threshold { 
"swift_${swift_cluster_dispersion_container}":
         description     => "swift ${swift_cluster} container availability",
-        metric          => 
"swift.${swift_cluster}.dispersion.container.pct_found.value",
+        metric          => 
"swift.${swift_cluster}.dispersion.container.pct_found",
         from            => '30min',
         warning         => 92,
         critical        => 88,

-- 
To view, visit https://gerrit.wikimedia.org/r/203039
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ief9572bef4872fc8999ef5ec4a3a3e67ac500d84
Gerrit-PatchSet: 2
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Filippo Giunchedi <fgiunch...@wikimedia.org>
Gerrit-Reviewer: Filippo Giunchedi <fgiunch...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to