Volans has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/391235 )
Change subject: Icinga: allow to set display_name ...................................................................... Icinga: allow to set display_name * In order to add links to Grafana dashboards in the Icinga alerts based on Grafana and Prometheus thresholds, allow to set the Icinga display_name field as a union of the description and a dashboard_link. * The way naggen2 works, the new parameter should not be added to the generated Icinga configuration file unless it's set, avoiding to make the generated configuration larger. * The added dashboard_link parameter will be made mandatory once all the calls will have it. Bug: T170353 Change-Id: I87c6daa29994f77b957ca04cd4af409121635f56 --- M modules/monitoring/manifests/check_prometheus.pp M modules/monitoring/manifests/graphite_anomaly.pp M modules/monitoring/manifests/graphite_threshold.pp M modules/monitoring/manifests/service.pp 4 files changed, 93 insertions(+), 62 deletions(-) Approvals: jenkins-bot: Verified Volans: Looks good to me, approved diff --git a/modules/monitoring/manifests/check_prometheus.pp b/modules/monitoring/manifests/check_prometheus.pp index 176f731..70ca4e6 100644 --- a/modules/monitoring/manifests/check_prometheus.pp +++ b/modules/monitoring/manifests/check_prometheus.pp @@ -68,7 +68,10 @@ # # [*contact_group*] # What contact groups to use for notifications - +# +# [*dashboard_link*] +# Link to the Grafana dashboard for this alarm +# define monitoring::check_prometheus( $description, $query, @@ -81,7 +84,8 @@ $group = undef, $ensure = present, $nagios_critical = false, - $contact_group = 'admins' + $contact_group = 'admins', + $dashboard_link = undef, ) { validate_re($method, '^(gt|ge|lt|le|eq|ne)$') @@ -92,6 +96,12 @@ default => 'check_prometheus', } + if $dashboard_link { + $display_name = "${description} - ${dashboard_link}" + } else { + $display_name = undef + } + monitoring::service { $title: ensure => $ensure, description => $description, @@ -100,5 +110,6 @@ group => $group, critical => $nagios_critical, contact_group => $contact_group, + display_name => $display_name, } } diff --git a/modules/monitoring/manifests/graphite_anomaly.pp b/modules/monitoring/manifests/graphite_anomaly.pp index 54737d3..e5e2f25 100644 --- a/modules/monitoring/manifests/graphite_anomaly.pp +++ b/modules/monitoring/manifests/graphite_anomaly.pp @@ -23,17 +23,18 @@ # } # # == Parameters -# $description - Description of icinga alert -# $metric - graphite metric name -# $warning - alert warning datapoints -# $critical - alert critical datapoints -# $check_window - the number of datapoints on which the check -# is performed. Defaults to 100. -# $graphite_url - URL of the graphite server. -# $timeout - Timeout for the http query to -# graphite. Defaults to 10 seconds -# over - check only for values above the limit -# under - check only for values below the limit +# $description - Description of icinga alert +# $metric - graphite metric name +# $warning - alert warning datapoints +# $critical - alert critical datapoints +# $check_window - the number of datapoints on which the check +# is performed. Defaults to 100. +# $graphite_url - URL of the graphite server. +# $timeout - Timeout for the http query to +# graphite. Defaults to 10 seconds +# $over - check only for values above the limit +# $under - check only for values below the limit +# $dashboard_link - Link to the Grafana dashboard for this alarm # $host # $retries # $group @@ -49,21 +50,22 @@ $metric, $warning, $critical, - $check_window = 100, - $graphite_url = 'https://graphite.wikimedia.org', - $timeout = 10, - $over = false, - $under = false, - $host = $::hostname, - $retries = 3, - $group = undef, - $ensure = present, - $nagios_critical = false, - $passive = false, - $freshness = 36000, - $check_interval = 1, - $retry_interval = 1, - $contact_group = 'admins' + $check_window = 100, + $graphite_url = 'https://graphite.wikimedia.org', + $timeout = 10, + $over = false, + $under = false, + $host = $::hostname, + $retries = 3, + $group = undef, + $ensure = present, + $nagios_critical = false, + $passive = false, + $freshness = 36000, + $check_interval = 1, + $retry_interval = 1, + $contact_group = 'admins', + $dashboard_link = undef, ) { @@ -79,6 +81,12 @@ if $metric =~ /'/ { fail("single quotes will be stripped from graphite metric ${metric}, consider using double quotes") + } + + if $dashboard_link { + $display_name = "${description} - ${dashboard_link}" + } else { + $display_name = undef } # checkcommands.cfg's check_graphite_anomaly command has @@ -103,5 +111,6 @@ check_interval => $check_interval, retry_interval => $retry_interval, contact_group => $contact_group, + display_name => $display_name, } } diff --git a/modules/monitoring/manifests/graphite_threshold.pp b/modules/monitoring/manifests/graphite_threshold.pp index bf836a5..5a0ddb2 100644 --- a/modules/monitoring/manifests/graphite_threshold.pp +++ b/modules/monitoring/manifests/graphite_threshold.pp @@ -22,23 +22,24 @@ # percentage => 5, # } # == Parameters -# $description - Description of icinga alert -# $metric - graphite metric name -# $warning - alert warning threshold -# $critical - alert critical threshold -# $series - true if the metric refers to a series of graphite -# datapoints that should be checked individually -# $from - Date from which to fetch data. -# Examples: '1hours','10min' (default), '2w' -# $until - end sampling date (negative relative time from -# now. Default: '0min' -# $percentage - Number of datapoints exceeding the -# threshold. Defaults to 1%. -# $under - If true, the threshold is a lower limit. -# Defaults to false. -# $graphite_url - URL of the graphite server. -# $timeout - Timeout for the http query to -# graphite. Defaults to 10 seconds +# $description - Description of icinga alert +# $metric - graphite metric name +# $warning - alert warning threshold +# $critical - alert critical threshold +# $series - true if the metric refers to a series of graphite +# datapoints that should be checked individually +# $from - Date from which to fetch data. +# Examples: '1hours','10min' (default), '2w' +# $until - end sampling date (negative relative time from +# now. Default: '0min' +# $percentage - Number of datapoints exceeding the +# threshold. Defaults to 1%. +# $under - If true, the threshold is a lower limit. +# Defaults to false. +# $graphite_url - URL of the graphite server. +# $timeout - Timeout for the http query to +# graphite. Defaults to 10 seconds +# $dashboard_link - Link to the Grafana dashboard for this alarm # $host # $retries # $group @@ -54,23 +55,24 @@ $metric, $warning, $critical, - $series = false, - $from = '10min', - $until = '0min', - $percentage = 1, - $under = false, - $graphite_url = 'https://graphite.wikimedia.org', - $timeout = 10, - $host = $::hostname, - $retries = 3, - $group = undef, - $ensure = present, - $nagios_critical = false, - $passive = false, - $freshness = 36000, - $check_interval = 1, - $retry_interval = 1, - $contact_group = 'admins' + $series = false, + $from = '10min', + $until = '0min', + $percentage = 1, + $under = false, + $graphite_url = 'https://graphite.wikimedia.org', + $timeout = 10, + $host = $::hostname, + $retries = 3, + $group = undef, + $ensure = present, + $nagios_critical = false, + $passive = false, + $freshness = 36000, + $check_interval = 1, + $retry_interval = 1, + $contact_group = 'admins', + $dashboard_link = undef, ) { @@ -101,6 +103,12 @@ default => 'check_graphite_threshold' } + if $dashboard_link { + $display_name = "${description} - ${dashboard_link}" + } else { + $display_name = undef + } + monitoring::service { $title: ensure => $ensure, description => $description, @@ -113,5 +121,6 @@ check_interval => $check_interval, retry_interval => $retry_interval, contact_group => $contact_group, + display_name => $display_name, } } diff --git a/modules/monitoring/manifests/service.pp b/modules/monitoring/manifests/service.pp index 622f9ba..0cba968 100644 --- a/modules/monitoring/manifests/service.pp +++ b/modules/monitoring/manifests/service.pp @@ -14,6 +14,7 @@ $config_dir = '/etc/nagios', $event_handler = undef, $notifications_enabled = $::profile::base::notifications_enabled, + $display_name = undef, ) { # the list of characters is the default for illegal_object_name_chars @@ -103,6 +104,7 @@ check_freshness => $check_fresh, freshness_threshold => $is_fresh, event_handler => $event_handler, + display_name => $display_name, }, } # This is a hack. We detect if we are running on the scope of an icinga -- To view, visit https://gerrit.wikimedia.org/r/391235 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I87c6daa29994f77b957ca04cd4af409121635f56 Gerrit-PatchSet: 5 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Volans <rcocci...@wikimedia.org> Gerrit-Reviewer: Alexandros Kosiaris <akosia...@wikimedia.org> Gerrit-Reviewer: Faidon Liambotis <fai...@wikimedia.org> Gerrit-Reviewer: Filippo Giunchedi <fgiunch...@wikimedia.org> Gerrit-Reviewer: Volans <rcocci...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits