Filippo Giunchedi has uploaded a new change for review. https://gerrit.wikimedia.org/r/197352
Change subject: graphite: add error alerts ...................................................................... graphite: add error alerts provide alerts for error situations, e.g. queue dropping datapoints, too many metric creations, file write errors Bug: T92965 Change-Id: I0d3816e922bb749f0750d49299c56d0d2e34034c --- M manifests/role/graphite.pp A modules/graphite/manifests/monitoring/graphite.pp 2 files changed, 38 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/52/197352/1 diff --git a/manifests/role/graphite.pp b/manifests/role/graphite.pp index 4fce5df..315539f 100644 --- a/manifests/role/graphite.pp +++ b/manifests/role/graphite.pp @@ -228,6 +228,7 @@ include ::eventlogging::monitoring::graphite include ::swift::monitoring::graphite include ::swift_new::monitoring::graphite + include ::graphite::monitoring::graphite # Monitor production 5xx rates monitoring::graphite_threshold { 'reqstats_5xx': diff --git a/modules/graphite/manifests/monitoring/graphite.pp b/modules/graphite/manifests/monitoring/graphite.pp new file mode 100644 index 0000000..d4ef199 --- /dev/null +++ b/modules/graphite/manifests/monitoring/graphite.pp @@ -0,0 +1,37 @@ +class graphite::monitoring::graphite { + monitoring::graphite_threshold { 'carbon-relay queue full': + description => 'carbon-relay queue full', + metric => 'sumSeries(carbon.relays.graphite1001-*.destinations.*.fullQueueDrops)', + from => '10minutes', + warning => 200, + critical => 1000, + nagios_critical => false + } + + monitoring::graphite_threshold { 'carbon-cache write error': + description => 'carbon-cache write error', + metric => 'secondYAxis(sumSeries(carbon.agents.graphite1001-*.errors))', + from => '10minutes', + warning => 1, + critical => 8, + nagios_critical => false + } + + monitoring::graphite_threshold { 'carbon-cache overflows': + description => 'carbon-cache queues overflow', + metric => 'secondYAxis(sumSeries(carbon.agents.graphite1001-*.cache.overflow))', + from => '10minutes', + warning => 1, + critical => 8, + nagios_critical => false + } + + monitoring::graphite_threshold { 'carbon-cache creates': + description => 'carbon-cache too many creates', + metric => 'sumSeries(carbon.agents.graphite1001-*.creates)', + from => '1hour', + warning => 200, + critical => 1000, + nagios_critical => false + } +} -- To view, visit https://gerrit.wikimedia.org/r/197352 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I0d3816e922bb749f0750d49299c56d0d2e34034c Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Filippo Giunchedi <fgiunch...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits