Filippo Giunchedi has uploaded a new change for review.
https://gerrit.wikimedia.org/r/216863
Change subject: restbase: add error rates and storage latencies alerts
......................................................................
restbase: add error rates and storage latencies alerts
similar to 5d04bd78d but only add the alerts to the graphite host, this avoid
having equal alerts that would fire at the same time for each and every
restbase host
Bug: T78514
Change-Id: I580dd8ac60d0ab1ebf0a5abd52f304f36f509754
---
M manifests/role/restbase.pp
M manifests/site.pp
2 files changed, 23 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/63/216863/1
diff --git a/manifests/role/restbase.pp b/manifests/role/restbase.pp
index 5bb8984..079eb83 100644
--- a/manifests/role/restbase.pp
+++ b/manifests/role/restbase.pp
@@ -19,3 +19,25 @@
}
}
+
+class role::restbase::alerts {
+ monitoring::graphite_threshold { 'restbase_request_5xx_rate':
+ description => 'RESTBase requests returning 5xx, in req/s',
+ metric =>
'restbase.v1_page_html_-title-_-revision--_tid-.GET.5xx.sample_rate',
+ from => '10min',
+ warning => '1', # 1 5xx/s
+ critical => '3', # 5 5xx/s
+ percentage => '20',
+ contact_group => 'team-services',
+ }
+
+ monitoring::graphite_threshold { 'restbase_html_storage_hit_latency':
+ description => 'RESTBase HTML storage load mean latency',
+ metric =>
'movingMedian(restbase.sys_key-rev-value_-bucket-_-key--_revision--_tid-.GET.2xx.mean,
15)',
+ from => '10min',
+ warning => '25', # 25ms
+ critical => '50', # 50ms
+ percentage => '50',
+ contact_group => 'team-services',
+ }
+}
diff --git a/manifests/site.pp b/manifests/site.pp
index 0542349..8b39015 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -1159,6 +1159,7 @@
include role::gdash
include role::performance
include role::graphite::production::alerts
+ include role::restbase::alerts
}
# graphite test machine, currently with SSD caching + spinning disks
--
To view, visit https://gerrit.wikimedia.org/r/216863
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I580dd8ac60d0ab1ebf0a5abd52f304f36f509754
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Filippo Giunchedi <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits