Ottomata has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/381489 )
Change subject: [WIP] Prometheus based Kafka broker alerts, take 1 ...................................................................... [WIP] Prometheus based Kafka broker alerts, take 1 This refactors the Prometheus JXM exporter just a bit, moving it to a separate profile::kafka::broker::monitoring class, along with icinga alerts. Bug: T175923 Change-Id: I839d5de4110da245f712e23285280c2fd546fe8f --- M hieradata/role/common/kafka/jumbo/broker.yaml M modules/profile/manifests/kafka/broker.pp A modules/profile/manifests/kafka/broker/monitoring.pp 3 files changed, 87 insertions(+), 39 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/89/381489/1 diff --git a/hieradata/role/common/kafka/jumbo/broker.yaml b/hieradata/role/common/kafka/jumbo/broker.yaml index 5fb6770..5b18607 100644 --- a/hieradata/role/common/kafka/jumbo/broker.yaml +++ b/hieradata/role/common/kafka/jumbo/broker.yaml @@ -2,8 +2,8 @@ profile::kafka::broker::kafka_cluster_name: jumbo -# Enable the Prometheus JMX Exporter -profile::kafka::broker::prometheus_monitoring_enabled: true +# Enable Monitoring (via Prometheus) and icinga alerts +profile::kafka::broker::monitoring_enabled: true profile::kafka::broker::log_dirs: [/srv/kafka/data] profile::kafka::broker::plaintext: true @@ -28,5 +28,5 @@ profile::kafka::broker::num_recovery_threads_per_data_dir: 12 profile::kafka::broker::num_io_threads: 12 -profile::kafka::broker::replica_maxlag_warning: "1000000" -profile::kafka::broker::replica_maxlag_critical: "5000000" +profile::kafka::broker::monitoring::replica_maxlag_warning: 1000000 +profile::kafka::broker::monitoring::replica_maxlag_critical: 5000000 diff --git a/modules/profile/manifests/kafka/broker.pp b/modules/profile/manifests/kafka/broker.pp index 0ce4f1a..e0b3e71 100644 --- a/modules/profile/manifests/kafka/broker.pp +++ b/modules/profile/manifests/kafka/broker.pp @@ -87,7 +87,7 @@ $replica_maxlag_critical = hiera('profile::kafka::broker::replica_maxlag_critical'), # This is set via top level hiera variable so it can be synchronized between roles and clients. $message_max_bytes = hiera('kafka_message_max_bytes'), - $prometheus_monitoring_enabled = hiera('profile::kafka::broker::prometheus_monitoring_enabled'), + $monitoring_enabled = hiera('profile::kafka::broker::monitoring_enabled'), $prometheus_nodes = hiera('prometheus_nodes'), ) { # TODO: WIP @@ -185,39 +185,6 @@ java_home => '/usr/lib/jvm/java-8-openjdk-amd64', } - if $prometheus_monitoring_enabled { - # Allow automatic generation of config on the - # Prometheus master - prometheus::jmx_exporter_instance { $::hostname: - address => $::ipaddress, - port => 7800, - } - - $prometheus_nodes_ferm = join($prometheus_nodes, ' ') - ferm::service { 'kafka-broker-jmx_exporter': - proto => 'tcp', - port => '7800', - srange => "@resolve((${prometheus_nodes_ferm}))", - } - - require_package('prometheus-jmx-exporter') - - $jmx_exporter_config_file = '/etc/kafka/broker_prometheus_jmx_exporter.yaml' - $java_opts = "-javaagent:/usr/share/java/prometheus/jmx_prometheus_javaagent.jar=${::ipaddress}:7800:${jmx_exporter_config_file}" - - # Create the Prometheus JMX Exporter configuration - file { $jmx_exporter_config_file: - ensure => present, - source => 'puppet:///modules/profile/kafka/broker_prometheus_jmx_exporter.yaml', - owner => 'kafka', - group => 'kafka', - mode => '0400', - require => Class['::confluent::kafka::broker'], - } - } else { - $java_opts = undef - } - class { '::confluent::kafka::broker': log_dirs => $log_dirs, brokers => $config['brokers']['hash'], @@ -233,7 +200,6 @@ # https://kafka.apache.org/documentation/#java # Note that MetaspaceSize is a Java 8 setting. jvm_performance_opts => '-server -XX:MetaspaceSize=96m -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:G1HeapRegionSize=16M -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80', - java_opts => $java_opts, listeners => $listeners, security_inter_broker_protocol => $security_inter_broker_protocol, @@ -249,6 +215,15 @@ message_max_bytes => $message_max_bytes, } + # If monitoring is enabled, then include the monitoring profile and set $java_opts + # for exposing the Prometheus JMX Exporter in the Kafka Broker process. + if $monitoring_enabled { + include ::profile::kafka::broker::monitoring + Class['::confluent::kafka::broker'] { + java_opts => $::profile::kafka::broker::monitoring::java_opts + } + } + $ferm_plaintext_ensure = $plaintext ? { false => 'absent', undef => 'absent', diff --git a/modules/profile/manifests/kafka/broker/monitoring.pp b/modules/profile/manifests/kafka/broker/monitoring.pp new file mode 100644 index 0000000..628972b --- /dev/null +++ b/modules/profile/manifests/kafka/broker/monitoring.pp @@ -0,0 +1,73 @@ +# Class: profile::kafka::broker::monitoring +# +# Sets up Prometheus based monitoring and icinga alerts. +# +class profile::kafka::broker::monitoring ( + $cluster = hiera('cluster'), + $prometheus_nodes = hiera('prometheus_nodes'), + $replica_maxlag_warning = hiera('profile::kafka::broker::monitoring::replica_maxlag_warning'), + $replica_maxlag_critical = hiera('profile::kafka::broker::monitoring::replica_maxlag_critical'), +) { + ### Expose Kafka Broker JMX metrics to Prometheus + require_package('prometheus-jmx-exporter') + + $prometheus_jmx_exporter_port = 7800 + $jmx_exporter_config_file = '/etc/kafka/broker_prometheus_jmx_exporter.yaml' + + # Use this in your JAVA_OPTS you pass to the Kafka broker process + $java_opts = "-javaagent:/usr/share/java/prometheus/jmx_prometheus_javaagent.jar=${::ipaddress}:${prometheus_jmx_exporter_port}:${jmx_exporter_config_file}" + + # Create the Prometheus JMX Exporter configuration + file { $jmx_exporter_config_file: + ensure => present, + source => 'puppet:///modules/profile/kafka/broker_prometheus_jmx_exporter.yaml', + owner => 'kafka', + group => 'kafka', + mode => '0400', + # Require this to make sure that kafka user and group are already created. + require => Class['::confluent::kafka::broker'], + } + + # Allow automatic generation of config on the Prometheus master + prometheus::jmx_exporter_instance { $::hostname: + address => $::ipaddress, + port => $prometheus_jmx_exporter_port, + } + + $prometheus_nodes_ferm = join($prometheus_nodes, ' ') + ferm::service { 'kafka-broker-jmx_exporter': + proto => 'tcp', + port => '7800', + srange => "@resolve((${prometheus_nodes_ferm}))", + } + + + ### Icinga alerts + # Generate icinga alert if Kafka Broker Server is not running. + nrpe::monitor_service { 'kafka': + description => 'Kafka Broker Server', + nrpe_command => '/usr/lib/nagios/plugins/check_procs -c 1:1 -C java -a "Kafka /etc/kafka/server.properties"', + critical => true, + } + + # Prometheus labels for this Kafka Broker instance + $prometheus_labels = "cluster=kafka_${cluster},instance=${::hostname}:${prometheus_jmx_exporter_port},job=jmx_kafka" + + # Alert on the average number of under replicated partitions over the last 30 minutes. + monitoring::check_prometheus { 'kafka_broker_under_replicated_partitions': + description => 'Kafka Broker Under Replicated Partitions', + metric => "scalar(avg_over_time(kafka_server_replicamanager_underreplicatedpartitions{${prometheus_labels}}[30m]))", + warning => 5, + critical => 10, + prometheus_url => "http://prometheus.svc.${::site}.wmnet/ops", + } + + # Alert on the average max replica lag over the last 30 minutes. + monitoring::check_prometheus { 'kafka_broker_replica_max_lag': + description => 'Kafka Broker Replica Max Lag', + metric => "scalar(avg_over_time(kafka_server_replicafetchermanager_maxlag{${prometheus_labels}}[30m]))" + warning => $replica_maxlag_warning, + critical => $replica_maxlag_critical, + prometheus_url => "http://prometheus.svc.${::site}.wmnet/ops", + } +} \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/381489 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I839d5de4110da245f712e23285280c2fd546fe8f Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ottomata <ao...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits