Elukey has submitted this change and it was merged. Change subject: Switch Varnishkafka monitoring from Ganglia to statsd ......................................................................
Switch Varnishkafka monitoring from Ganglia to statsd This change includes: 1) the varnishkafka submodule SHA upgrade ref: https://gerrit.wikimedia.org/r/#/c/324877 https://gerrit.wikimedia.org/r/#/c/324887 https://gerrit.wikimedia.org/r/#/c/324890 https://gerrit.wikimedia.org/r/#/c/324891 2) removal of the last Ganglia configuration for statsv; 3) removal of not used graphite monitors for webrequest.pp; 4) statsd configuration for statsv and eventlogging instances. Bug: T152093 Change-Id: I3c14fa90baba8063bf946dd7feb3733d090ee1bb --- M modules/role/manifests/cache/kafka/eventlogging.pp M modules/role/manifests/cache/kafka/statsv.pp M modules/role/manifests/cache/kafka/webrequest.pp M modules/varnishkafka 4 files changed, 36 insertions(+), 42 deletions(-) Approvals: Elukey: Looks good to me, approved Ottomata: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/modules/role/manifests/cache/kafka/eventlogging.pp b/modules/role/manifests/cache/kafka/eventlogging.pp index fc5515d..57802a5 100644 --- a/modules/role/manifests/cache/kafka/eventlogging.pp +++ b/modules/role/manifests/cache/kafka/eventlogging.pp @@ -24,4 +24,16 @@ topic_request_required_acks => '1', conf_template => $conf_template, } + + include ::standard + + $cache_type = hiera('cache::cluster') + $graphite_metric_prefix = "varnishkafka.${::hostname}.eventlogging.${cache_type}" + + # Sets up Logster to read from the Varnishkafka instance stats JSON file + # and report metrics to statsd. + varnishkafka::monitor::statsd { 'eventlogging': + graphite_metric_prefix => $graphite_metric_prefix, + statsd_host_port => hiera('statsd'), + } } diff --git a/modules/role/manifests/cache/kafka/statsv.pp b/modules/role/manifests/cache/kafka/statsv.pp index a31b1a9..15115b4 100644 --- a/modules/role/manifests/cache/kafka/statsv.pp +++ b/modules/role/manifests/cache/kafka/statsv.pp @@ -36,7 +36,14 @@ } include ::standard - if $::standard::has_ganglia { - varnishkafka::monitor { 'statsv': } + + $cache_type = hiera('cache::cluster') + $graphite_metric_prefix = "varnishkafka.${::hostname}.statsv.${cache_type}" + + # Sets up Logster to read from the Varnishkafka instance stats JSON file + # and report metrics to statsd. + varnishkafka::monitor::statsd { 'statsv': + graphite_metric_prefix => $graphite_metric_prefix, + statsd_host_port => hiera('statsd'), } } diff --git a/modules/role/manifests/cache/kafka/webrequest.pp b/modules/role/manifests/cache/kafka/webrequest.pp index 01926d9..447cfa9 100644 --- a/modules/role/manifests/cache/kafka/webrequest.pp +++ b/modules/role/manifests/cache/kafka/webrequest.pp @@ -117,55 +117,30 @@ require => Class['::varnishkafka'], } - # Extract cache type name from topic for use in statsd prefix. - # There is probably a better way to do this. - $cache_type = regsubst($topic, '^webrequest_(.+)$', '\1') + $cache_type = hiera('cache::cluster') $graphite_metric_prefix = "varnishkafka.${::hostname}.webrequest.${cache_type}" - # Test using logster to send varnishkafka stats to statsd -> graphite. - # This may be moved into the varnishkafka module. - logster::job { 'varnishkafka-webrequest': - minute => '*/1', - parser => 'JsonLogster', - logfile => '/var/cache/varnishkafka/webrequest.stats.json', - logster_options => "-o statsd --statsd-host=statsd.eqiad.wmnet:8125 --metric-prefix=${graphite_metric_prefix}", + # Sets up Logster to read from the Varnishkafka instance stats JSON file + # and report metrics to statsd. + varnishkafka::monitor::statsd { 'webrequest': + graphite_metric_prefix => $graphite_metric_prefix, + statsd_host_port => hiera('statsd'), } - - - # TEMPORARY test --until on all vk drerr alerts - $until = '0min' # Generate an alert if too many delivery report errors per minute # (logster only reports once a minute) monitoring::graphite_threshold { 'varnishkafka-kafka_drerr': - ensure => 'present', - description => 'Varnishkafka Delivery Errors per minute', - metric => "derivative(transformNull(${graphite_metric_prefix}.varnishkafka.kafka_drerr, 0))", + ensure => 'present', + description => 'Varnishkafka Delivery Errors per minute', + metric => "derivative(transformNull(${graphite_metric_prefix}.varnishkafka.kafka_drerr, 0))", # More than 0 errors is warning threshold. - warning => 0, + warning => 0, # More than 20000 errors is critical threshold. - critical => 20000, + critical => 20000, # But only alert if a large percentage of the examined datapoints # are over the threshold. - percentage => 80, - from => '10min', - until => $until, - nagios_critical => false, - require => Logster::Job['varnishkafka-webrequest'], - } - - # Use graphite_anomaly to alert about anomolous deliver errors. - monitoring::graphite_anomaly { 'varnishkafka-anomaly-kafka_drerr': - # Disabling this. It doesn't work like I wanted it to. - ensure => 'absent', - description => 'Varnishkafka Delivery Errors per minute anomaly', - metric => "nonNegativeDerivative(transformNull(${graphite_metric_prefix}.varnishkafka.kafka_drerr, 0))", - over => true, - # warn if more than 10 anomylous datapoints (last 10 minutes) - warning => 5, - # critical if more than 45 anomylous datapoints (last 45 minutes) - critical => 45, - nagios_critical => false, - require => Logster::Job['varnishkafka-webrequest'], + percentage => 80, + from => '10min', + require => Logster::Job['varnishkafka-webrequest'], } } diff --git a/modules/varnishkafka b/modules/varnishkafka index 63949e1..9e85da9 160000 --- a/modules/varnishkafka +++ b/modules/varnishkafka @@ -1 +1 @@ -Subproject commit 63949e1cb21e0a66fb974a135102b7795d1e2051 +Subproject commit 9e85da9077f4fe440fab43b6acc14487ed4a2d39 -- To view, visit https://gerrit.wikimedia.org/r/324883 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I3c14fa90baba8063bf946dd7feb3733d090ee1bb Gerrit-PatchSet: 10 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Elukey <ltosc...@wikimedia.org> Gerrit-Reviewer: BBlack <bbl...@wikimedia.org> Gerrit-Reviewer: Elukey <ltosc...@wikimedia.org> Gerrit-Reviewer: Ema <e...@wikimedia.org> Gerrit-Reviewer: Ottomata <o...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits