Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/72733


Change subject: Adding icinga alerts for per topic kafka producers
......................................................................

Adding icinga alerts for per topic kafka producers

Change-Id: Ida23a744266afe880783056b8b630b92a2587944
---
M manifests/misc/analytics.pp
M manifests/role/analytics.pp
M templates/icinga/checkcommands.cfg.erb
3 files changed, 45 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/33/72733/1

diff --git a/manifests/misc/analytics.pp b/manifests/misc/analytics.pp
index 46fc0c0..a110fd1 100644
--- a/manifests/misc/analytics.pp
+++ b/manifests/misc/analytics.pp
@@ -25,11 +25,33 @@
        }
 }
 
+# == Define misc::analytics::monitoring::kafka::producer
+# Sets up Icinga alerts for a Kafka Producer identified by $topic.
+#
+# == Parameters:
+# $warning
+# $critical
+#
+# Usage:
+#   misc::analytics::monitoring::kafka::producer { 'webrequest-mobile':
+#      warning  => 1,
+#      critical => 5,
+#   }
+#
+define misc::analytics::monitoring::kafka::producer($warning, $critical) {
+       # Set up icinga monitoring of Kafka producer async produce events per 
second.
+       # If this drops too low, trigger an alert.
+       monitor_service { "kafka-producer-${title}.AsyncProducerEvents":
+               description           => 
"kafka_producer_${title}.AsyncProducerEvents",
+               check_command         => 
"check_kafka_producer_produce_events!${title}!${warning}!${critical}",
+               contact_group         => "analytics",
+       }
+}
+
 
 class misc::analytics::monitoring::kafka::server {
        # Set up icinga monitoring of Kafka broker server produce requests per 
second.
        # If this drops too low, trigger an alert
-       # for this udp2log instance.
        monitor_service { "kakfa-broker-ProduceRequestsPerSecond_min":
                description           => 
"kafka_network_SocketServerStats.ProduceRequestsPerSecond_min",
                check_command         => 
"check_kafka_broker_produce_requests_min!5!1",
@@ -38,7 +60,6 @@
 
        # Set up icinga monitoring of Kafka broker server produce requests per 
second.
        # If this drops too low, trigger an alert
-       # for this udp2log instance.
        monitor_service { "kakfa-broker-ProduceRequestsPerSecond_max":
                description           => 
"kafka_network_SocketServerStats.ProduceRequestsPerSecond_max",
                check_command         => 
"check_kafka_broker_produce_requests_max!15!20",
diff --git a/manifests/role/analytics.pp b/manifests/role/analytics.pp
index 3d62194..35475ad 100644
--- a/manifests/role/analytics.pp
+++ b/manifests/role/analytics.pp
@@ -143,6 +143,11 @@
                packet_loss_log    => $packet_loss_log,
                monitor_log_age    => false,
        }
+
+       misc::analytics::monitoring::kafka::producer { 'webrequest-mobile':
+               warning  => 2000000,
+               critical => 1000000,
+       }
 }
 
 # == role::analytics::udp2log::wikipedia_mobile
@@ -166,6 +171,11 @@
                log_directory      => $log_directory,
                packet_loss_log    => $packet_loss_log,
                monitor_log_age    => false,
+       }
+
+       misc::analytics::monitoring::kafka::producer { 
'webrequest-wikipedia-mobile':
+               warning  => 2000000,
+               critical => 1000000,
        }
 }
 
@@ -192,4 +202,9 @@
                packet_loss_log    => $packet_loss_log,
                monitor_log_age    => false,
        }
+
+       misc::analytics::monitoring::kafka::producer { 
'webrequest-wikipedia-mobile':
+               warning  => 120000,
+               critical => 6000,
+       }
 }
diff --git a/templates/icinga/checkcommands.cfg.erb 
b/templates/icinga/checkcommands.cfg.erb
index c569665..49d018a 100644
--- a/templates/icinga/checkcommands.cfg.erb
+++ b/templates/icinga/checkcommands.cfg.erb
@@ -626,6 +626,13 @@
        command_line    $USER1$/check_ganglios_generic_value -H $HOSTADDRESS$ 
-m kafka_network_SocketServerStats.ProduceRequestsPerSecond -w $ARG1$ -c $ARG2$ 
-o gt
 }
 
+# Check that udp2log Kakfa producers are producing
+define command{
+       command_name    check_kafka_producer_produce_events
+       command_line    $USER1$/check_ganglios_generic_value -H $HOSTADDRESS$ 
-m udp2log_kafka_producer_$ARG1.AsyncProducerEvents -w $ARG2$ -c $ARG3$ -o lt
+}
+
+
 
 # Alerts for data loss in Kraken HDFS.
 define command{

-- 
To view, visit https://gerrit.wikimedia.org/r/72733
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ida23a744266afe880783056b8b630b92a2587944
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <o...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to