Ottomata has uploaded a new change for review. https://gerrit.wikimedia.org/r/72733
Change subject: Adding icinga alerts for per topic kafka producers ...................................................................... Adding icinga alerts for per topic kafka producers Change-Id: Ida23a744266afe880783056b8b630b92a2587944 --- M manifests/misc/analytics.pp M manifests/role/analytics.pp M templates/icinga/checkcommands.cfg.erb 3 files changed, 45 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/33/72733/1 diff --git a/manifests/misc/analytics.pp b/manifests/misc/analytics.pp index 46fc0c0..a110fd1 100644 --- a/manifests/misc/analytics.pp +++ b/manifests/misc/analytics.pp @@ -25,11 +25,33 @@ } } +# == Define misc::analytics::monitoring::kafka::producer +# Sets up Icinga alerts for a Kafka Producer identified by $topic. +# +# == Parameters: +# $warning +# $critical +# +# Usage: +# misc::analytics::monitoring::kafka::producer { 'webrequest-mobile': +# warning => 1, +# critical => 5, +# } +# +define misc::analytics::monitoring::kafka::producer($warning, $critical) { + # Set up icinga monitoring of Kafka producer async produce events per second. + # If this drops too low, trigger an alert. + monitor_service { "kafka-producer-${title}.AsyncProducerEvents": + description => "kafka_producer_${title}.AsyncProducerEvents", + check_command => "check_kafka_producer_produce_events!${title}!${warning}!${critical}", + contact_group => "analytics", + } +} + class misc::analytics::monitoring::kafka::server { # Set up icinga monitoring of Kafka broker server produce requests per second. # If this drops too low, trigger an alert - # for this udp2log instance. monitor_service { "kakfa-broker-ProduceRequestsPerSecond_min": description => "kafka_network_SocketServerStats.ProduceRequestsPerSecond_min", check_command => "check_kafka_broker_produce_requests_min!5!1", @@ -38,7 +60,6 @@ # Set up icinga monitoring of Kafka broker server produce requests per second. # If this drops too low, trigger an alert - # for this udp2log instance. monitor_service { "kakfa-broker-ProduceRequestsPerSecond_max": description => "kafka_network_SocketServerStats.ProduceRequestsPerSecond_max", check_command => "check_kafka_broker_produce_requests_max!15!20", diff --git a/manifests/role/analytics.pp b/manifests/role/analytics.pp index 3d62194..35475ad 100644 --- a/manifests/role/analytics.pp +++ b/manifests/role/analytics.pp @@ -143,6 +143,11 @@ packet_loss_log => $packet_loss_log, monitor_log_age => false, } + + misc::analytics::monitoring::kafka::producer { 'webrequest-mobile': + warning => 2000000, + critical => 1000000, + } } # == role::analytics::udp2log::wikipedia_mobile @@ -166,6 +171,11 @@ log_directory => $log_directory, packet_loss_log => $packet_loss_log, monitor_log_age => false, + } + + misc::analytics::monitoring::kafka::producer { 'webrequest-wikipedia-mobile': + warning => 2000000, + critical => 1000000, } } @@ -192,4 +202,9 @@ packet_loss_log => $packet_loss_log, monitor_log_age => false, } + + misc::analytics::monitoring::kafka::producer { 'webrequest-wikipedia-mobile': + warning => 120000, + critical => 6000, + } } diff --git a/templates/icinga/checkcommands.cfg.erb b/templates/icinga/checkcommands.cfg.erb index c569665..49d018a 100644 --- a/templates/icinga/checkcommands.cfg.erb +++ b/templates/icinga/checkcommands.cfg.erb @@ -626,6 +626,13 @@ command_line $USER1$/check_ganglios_generic_value -H $HOSTADDRESS$ -m kafka_network_SocketServerStats.ProduceRequestsPerSecond -w $ARG1$ -c $ARG2$ -o gt } +# Check that udp2log Kakfa producers are producing +define command{ + command_name check_kafka_producer_produce_events + command_line $USER1$/check_ganglios_generic_value -H $HOSTADDRESS$ -m udp2log_kafka_producer_$ARG1.AsyncProducerEvents -w $ARG2$ -c $ARG3$ -o lt +} + + # Alerts for data loss in Kraken HDFS. define command{ -- To view, visit https://gerrit.wikimedia.org/r/72733 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ida23a744266afe880783056b8b630b92a2587944 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ottomata <o...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits