Ottomata has uploaded a new change for review.
https://gerrit.wikimedia.org/r/258220
Change subject: [WIP] Using more generic roles for kafka classes
......................................................................
[WIP] Using more generic roles for kafka classes
This will deprecate role::analytics::kafka::* in favor of
role::kafka::analytics::*
This commit includes the new role on the analytics Kafka brokers, but does not
change any other users of role::analytics::kafka::config. This will
be addressed in a separate commit after this is verified to work on brokers.
Bug: T120957
Change-Id: Ifec423daa5d9b2a3d3e6e4b0bd12dda5639b8594
---
M hieradata/common.yaml
M manifests/site.pp
A modules/role/manifests/kafka/analytics/broker.pp
A modules/role/manifests/kafka/analytics/config.pp
A modules/role/manifests/kafka/main/broker.pp
A modules/role/manifests/kafka/main/config.pp
A modules/role/templates/kafka/kafka-profile.sh.erb
7 files changed, 368 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/20/258220/1
diff --git a/hieradata/common.yaml b/hieradata/common.yaml
index 33b8538..a0eefd7 100644
--- a/hieradata/common.yaml
+++ b/hieradata/common.yaml
@@ -320,3 +320,36 @@
glanceconfig:
db_host: 'm5-master.eqiad.wmnet'
bind_ip: "%{::ipaddress_eth0}"
+
+kafka_clusters:
+ # This is the analytics Kafka cluster, named just 'eqiad' for
+ # historical reasons.
+ eqiad:
+ brokers:
+ kafka1012.eqiad.wmnet:
+ id: 12 # Row A
+ kafka1013.eqiad.wmnet:
+ id: 13 # Row A
+ kafka1014.eqiad.wmnet:
+ id: 14 # Row C
+ kafka1018.eqiad.wmnet:
+ id: 18 # Row D
+ kafka1020.eqiad.wmnet:
+ id: 20 # Row D
+ kafka1022.eqiad.wmnet:
+ id: 22 # Row C
+
+ # TODO: set these once the hardware is in.
+ # main-eqiad:
+ # brokers:
+ # kafka1001.eqiad.wmnet:
+ # id: 1001
+ # kafka1002.eqiad.wmnet:
+ # id: 1002
+ #
+ # main-codfw:
+ # brokers:
+ # kafka2001.eqiad.wmnet:
+ # id: 2001
+ # kafka2002.eqiad.wmnet:
+ # id: 2002
diff --git a/manifests/site.pp b/manifests/site.pp
index 0f0cafd..2a82c80 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -1169,7 +1169,8 @@
# addresses.
interface::add_ip6_mapped { 'main': }
- role analytics::kafka::server
+ role kafka::analytics::broker
+
include role::analytics
include standard
include base::firewall
diff --git a/modules/role/manifests/kafka/analytics/broker.pp
b/modules/role/manifests/kafka/analytics/broker.pp
new file mode 100644
index 0000000..03ecbe6
--- /dev/null
+++ b/modules/role/manifests/kafka/analytics/broker.pp
@@ -0,0 +1,133 @@
+# == Class role::kafka::analytics::broker
+# Sets up a broker belonging to an Analytics cluster.
+# This role works for any site.
+#
+# See modules/role/manifests/kafka/README.md for more information.
+#
+class role::kafka::analytics::broker {
+ include ::role::kafka::analytics::config
+
+ require_package('openjdk-7-jdk')
+
+ system::role { 'role::kafka::analytics::broker':
+ description => "Kafka Broker Server in the
${::role::kafka::main::config::cluster_name} cluster",
+ }
+
+ # Make these local so kafka-profile.sh.erb can easily render them.
+ $zookeeper_url = $::role::kafka::main::config::zookeeper_url
+ $brokers_string = $::role::kafka::main::config::brokers_string
+ # export ZOOKEEPER_URL and BROKER_LIST user environment variable.
+ # This makes it much more convenient to run kafka commands without having
+ # to specify the --zookeeper or --brokers flag every time.
+ file { '/etc/profile.d/kafka.sh':
+ owner => 'root',
+ mode => '0444',
+ content => template('role/kafka/kafka-profile.sh.erb'),
+ }
+
+ $log_dirs = $::realm ? {
+ 'labs' => ['/var/spool/kafka'],
+ # Production analytics Kafka brokers have more disks.
+ 'production' => [
+ '/var/spool/kafka/a/data',
+ '/var/spool/kafka/b/data',
+ '/var/spool/kafka/c/data',
+ '/var/spool/kafka/d/data',
+ '/var/spool/kafka/e/data',
+ '/var/spool/kafka/f/data',
+ '/var/spool/kafka/g/data',
+ '/var/spool/kafka/h/data',
+ '/var/spool/kafka/i/data',
+ '/var/spool/kafka/j/data',
+ '/var/spool/kafka/k/data',
+ '/var/spool/kafka/l/data',
+ ],
+ }
+
+ $nofiles_ulimit = $::realm ? {
+ # Use default ulimit for labs kafka
+ 'labs' => 8192,
+ # Increase ulimit for production kafka.
+ 'production' => 65536,
+ }
+
+ class { '::kafka::server':
+ log_dirs => $log_dirs,
+ brokers =>
$::role::kafka::analytics::config::brokers_config,
+ zookeeper_hosts =>
$::role::kafka::analytics::config::zookeeper_hosts,
+ zookeeper_chroot =>
$::role::kafka::analytics::config::zookeeper_chroot,
+ nofiles_ulimit => $nofiles_ulimit,
+ jmx_port =>
$::role::kafka::analytics::config::jmx_port,
+
+ # Enable auto creation of topics.
+ auto_create_topics_enable => true,
+
+ # (Temporarily?) disable auto leader rebalance.
+ # I am having issues with analytics1012, and I can't
+ # get Camus to consume properly for its preferred partitions
+ # if it is online and the leader. - otto
+ auto_leader_rebalance_enable => false,
+
+ default_replication_factor => min(3,
size($::role::kafka::analytics::config::brokers_array)),
+ # Start with a low number of (auto created) partitions per
+ # topic. This can be increased manually for high volume
+ # topics if necessary.
+ num_partitions => 1,
+
+ # Bump this up to get a little more
+ # parallelism between replicas.
+ num_replica_fetchers => 12,
+ # Setting this larger so that it is sure to be bigger
+ # than batch size from varnishkafka.
+ # See: https://issues.apache.org/jira/browse/KAFKA-766
+ # webrequest_bits is about 50k msgs/sec, and has 10 partitions.
+ # That's 5000 msgs/second/partition, so this should allow
+ # a partition to get behind by up to 10 seconds before
+ # removing it from the ISR. This will be longer for
+ # less voluminous topics.
+ replica_lag_max_messages => 50000,
+ # Setting this to a value according to
https://cwiki.apache.org/confluence/display/KAFKA/FAQ#FAQ-HowtoreducechurnsinISR?WhendoesabrokerleavetheISR?
+ # 1 / MinFetcHRate * 1000. I assume this result to be in seconds,
since the default for max_ms is 10000.
+ # MinFetchRate ~= 45. 1/45*1000 ~= 22. Setting this to 30 seconds to
overcompensate.
+ # See also:
http://ganglia.wikimedia.org/latest/graph_all_periods.php?title=&vl=&x=&n=&hreg%5B%5D=analytics102%5B12%5D.*&mreg%5B%5D=kafka.server.ReplicaFetcherManager.Replica-MinFetchRate.Value>ype=line&glegend=show&aggregate=1
+ replica_lag_time_max_ms => 30000,
+ # Allow for 16 seconds of latency when talking with Zookeeper.
+ # We seen an issue where (mainly or only) analytics1021 will
+ # pause for almost 12 seconds for a yet unknown reason. Upping
+ # the session timeout here should give the broker enough time
+ # to get back in sync with Zookeeper before it is removed from the ISR.
+ # See T83561 (near the bottom)
+ # and:
http://mail-archives.apache.org/mod_mbox/kafka-users/201407.mbox/%3CCAFbh0Q2f71qgs5JDNFxkm7SSdZyYMH=zpeoxotueqfkqexq...@mail.gmail.com%3E
+ zookeeper_connection_timeout_ms => 16000,
+ zookeeper_session_timeout_ms => 16000,
+ # Use LinkedIn recommended settings with G1 garbage collector,
+ jvm_performance_opts => '-server -XX:PermSize=48m
-XX:MaxPermSize=48m -XX:+UseG1GC -XX:MaxGCPauseMillis=20
-XX:InitiatingHeapOccupancyPercent=35',
+ }
+
+ # firewall Kafka Broker
+ ferm::service { 'kafka-broker':
+ proto => 'tcp',
+ # TODO: A custom port can be configured in
+ # $brokers_config. Extract the proper
+ # port to open from that config hash.
+ port => 9999,
+ srange => '$ALL_NETWORKS',
+ }
+
+ # Include Kafka Server Jmxtrans class
+ # to send Kafka Broker metrics to Ganglia and statsd.
+ class { '::kafka::server::jmxtrans':
+ ganglia => hiera('ganglia_aggregators', undef),
+ statsd => hiera('statsd', undef),
+ jmx_port => $::kafka::server::jmx_port,
+ require => Class['::kafka::server'],
+ }
+
+ # Monitor kafka in production
+ if $::realm == 'production' {
+ class { '::kafka::server::monitoring':
+ jmx_port => $::role::kafka::analytics::config::jmx_port,
+ nagios_servicegroup => "analytics_${::site}",
+ }
+ }
+}
\ No newline at end of file
diff --git a/modules/role/manifests/kafka/analytics/config.pp
b/modules/role/manifests/kafka/analytics/config.pp
new file mode 100644
index 0000000..bef078b
--- /dev/null
+++ b/modules/role/manifests/kafka/analytics/config.pp
@@ -0,0 +1,51 @@
+# == Class role::kafka::analytics::config
+# Kafka config class for an Analytics Kafka cluster.
+# This class only contains variable definitions,
+# so it is safe to include anywhere in order to
+# reference them.
+#
+# See modules/role/manifests/kafka/README.md for more information.
+#
+class role::kafka::analytics::config {
+ # Choose cluster name from hiera, or default appropriately
+ # in labs or production
+ $cluster_name = hiera('kafka_cluster_name', $::realm ? {
+ 'labs' => "analytics-${::labsproject}"
+ 'production' => $::site ? {
+ 'eqiad' => 'eqiad',
+ default => "analytics-${::site}"
+ }
+ })
+
+ # Get all kafka cluster configs from hiera,
+ # or default to only single node cluster for $cluster_name.
+ $all_clusters = hiera('kafka_clusters', {
+ $cluster_name => {
+ 'brokers' => {
+ $::fqdn => {
+ 'id' => '1'
+ },
+ }
+ }
+ })
+
+ # Config hash suitable for passing to kafka::server's broker param
+ $brokers_config = $clusters[$cluster_name]['brokers']
+ # Array of broker hostnames in thie Kafka cluster
+ $brokers_array = keys($brokers_config)
+ # Comma separate string of broker hostname:ports,
+ # useful in many client configs.
+ $brokers_string = inline_template('<%= @brokers_config.keys.sort.map { |b|
"#{b}:#{@brokers_config[b].fetch("port", 9092)}" }.join(",") %>')
+
+ $jmx_port = 9999
+
+ # jmxtrans renders hostname metrics with underscores and
+ # suffixed with the jmx port. Build a graphite
+ # wildcard to match these.
+ # E.g. kafka1012.eqiad.wmnet -> kafka1012_eqiad_wmnet_9999
+ $brokers_graphite_wildcard = inline_template('{<%=
@brokers_array.join("_#{@jmx_port},").tr(".","_") + "_#{@jmx_port}" %>}')
+
+ $zookeeper_hosts = keys(hiera('zookeeper_hosts'))
+ $zookeeper_chroot = "/kafka/${cluster_name}"
+ $zookeeper_url = inline_template("<%= @zookeeper_hosts.sort.join(',')
%><%= @zookeeper_chroot %>")
+}
diff --git a/modules/role/manifests/kafka/main/broker.pp
b/modules/role/manifests/kafka/main/broker.pp
new file mode 100644
index 0000000..9dfb12c
--- /dev/null
+++ b/modules/role/manifests/kafka/main/broker.pp
@@ -0,0 +1,99 @@
+# == Class role::kafka::main::broker
+# Sets up a broker belonging to a main Kafka cluster.
+# This role works for any site.
+#
+# See modules/role/manifests/kafka/README.md for more information.
+#
+class role::kafka::main::broker {
+ include role::kafka::main::config
+
+ require_package('openjdk-7-jdk')
+
+ system::role { 'role::kafka::main::broker':
+ description => "Kafka Broker Server in the
${::role::kafka::main::config::cluster_name} cluster",
+ }
+
+ # Make these local so kafka-profile.sh.erb can easily render them.
+ $zookeeper_url = $::role::kafka::main::config::zookeeper_url
+ $brokers_string = $::role::kafka::main::config::brokers_string
+ # export ZOOKEEPER_URL and BROKER_LIST user environment variable.
+ # This makes it much more convenient to run kafka commands without having
+ # to specify the --zookeeper or --brokers flag every time.
+ file { '/etc/profile.d/kafka.sh':
+ owner => 'root',
+ mode => '0444',
+ content => template('role/kafka/kafka-profile.sh.erb'),
+ }
+
+ $log_dirs = $::realm ? {
+ 'labs' => ['/var/spool/kafka'],
+ # Production main Kafka brokers have more disks.
+ 'production' => [
+ # TODO: fill this in with real partitions when we have hardware.
+ '/var/spool/kafka/a/data',
+ '/var/spool/kafka/b/data',
+ '/var/spool/kafka/c/data',
+ '/var/spool/kafka/d/data',
+ ],
+ }
+
+ $nofiles_ulimit = $::realm ? {
+ # Use default ulimit for labs kafka
+ 'labs' => 8192,
+ # Increase ulimit for production kafka.
+ 'production' => 65536,
+ }
+
+ class { '::kafka::server':
+ log_dirs => $log_dirs,
+ brokers =>
$::role::kafka::main::config::brokers_config
+ zookeeper_hosts =>
$::role::kafka::main::config::zookeeper_hosts,
+ zookeeper_chroot =>
$::role::kafka::main::config::zookeeper_chroot,
+ nofiles_ulimit => $nofiles_ulimit,
+ jmx_port =>
$::role::kafka::analytics::config::jmx_port,
+
+ # Enable auto creation of topics.
+ auto_create_topics_enable => true,
+
+ # (Temporarily?) disable auto leader rebalance.
+ # I am having issues with analytics1012, and I can't
+ # get Camus to consume properly for its preferred partitions
+ # if it is online and the leader. - otto
+ auto_leader_rebalance_enable => false,
+
+ default_replication_factor => min(3,
size($::role::kafka::main::config::brokers_array)),
+ # Start with a low number of (auto created) partitions per
+ # topic. This can be increased manually for high volume
+ # topics if necessary.
+ num_partitions => 1,
+
+ # Use LinkedIn recommended settings with G1 garbage collector,
+ jvm_performance_opts => '-server -XX:PermSize=48m
-XX:MaxPermSize=48m -XX:+UseG1GC -XX:MaxGCPauseMillis=20
-XX:InitiatingHeapOccupancyPercent=35',
+ }
+
+ # firewall Kafka Broker
+ ferm::service { 'kafka-broker':
+ proto => 'tcp',
+ # TODO: A custom port can be configured in
+ # $brokers_config. Extract the proper
+ # port to open from that config hash.
+ port => 9999,
+ srange => '$ALL_NETWORKS',
+ }
+
+ # Include Kafka Server Jmxtrans class
+ # to send Kafka Broker metrics to Ganglia and statsd.
+ class { '::kafka::server::jmxtrans':
+ ganglia => hiera('ganglia_aggregators', undef),
+ statsd => hiera('statsd', undef),
+ jmx_port => $::kafka::server::jmx_port,
+ require => Class['::kafka::server'],
+ }
+
+ # Monitor kafka in production
+ if $::realm == 'production' {
+ class { '::kafka::server::monitoring':
+ jmx_port => $::role::kafka::analytics::config::jmx_port,
+ }
+ }
+}
diff --git a/modules/role/manifests/kafka/main/config.pp
b/modules/role/manifests/kafka/main/config.pp
new file mode 100644
index 0000000..7b47ba6
--- /dev/null
+++ b/modules/role/manifests/kafka/main/config.pp
@@ -0,0 +1,46 @@
+# == Class role::kafka::main::config
+# Kafka config class for a main Kafka cluster.
+# This class only contains variable definitions,
+# so it is safe to include anywhere in order to
+# reference them.
+#
+class role::kafka::main::config {
+ # Choose cluster name from hiera, or default appropriately
+ # in labs or production
+ $cluster_name = hiera('kafka_cluster_name', $::realm ? {
+ 'labs' => "main-${::labsproject}"
+ 'production' => "main-${::site}"
+ })
+
+ # Get all kafka cluster configs from hiera,
+ # or default to only single node cluster for $cluster_name.
+ $all_clusters = hiera('kafka_clusters', {
+ $cluster_name => {
+ 'brokers' => {
+ $::fqdn => {
+ 'id' => '1'
+ },
+ }
+ }
+ })
+
+ # Config hash suitable for passing to kafka::server's broker param
+ $brokers_config = $clusters[$cluster_name]['brokers']
+ # Array of broker hostnames in thie Kafka cluster
+ $brokers_array = keys($brokers_config)
+ # Comma separate string of broker hostname:ports,
+ # useful in many client configs.
+ $brokers_string = inline_template('<%= @brokers_config.keys.sort.map { |b|
"#{b}:#{@brokers_config[b].fetch("port", 9092)}" }.join(",") %>')
+
+ $jmx_port = 9999
+
+ # jmxtrans renders hostname metrics with underscores and
+ # suffixed with the jmx port. Build a graphite
+ # wildcard to match these.
+ # E.g. kafka1012.eqiad.wmnet -> kafka1012_eqiad_wmnet_9999
+ $brokers_graphite_wildcard = inline_template('{<%=
@brokers_array.join("_#{@jmx_port},").tr(".","_") + "_#{@jmx_port}" %>}')
+
+ $zookeeper_hosts = keys(hiera('zookeeper_hosts'))
+ $zookeeper_chroot = "/kafka/${cluster_name}"
+ $zookeeper_url = inline_template("<%= @zookeeper_hosts.sort.join(',')
%><%= @zookeeper_chroot %>")
+}
diff --git a/modules/role/templates/kafka/kafka-profile.sh.erb
b/modules/role/templates/kafka/kafka-profile.sh.erb
new file mode 100644
index 0000000..0eaa625
--- /dev/null
+++ b/modules/role/templates/kafka/kafka-profile.sh.erb
@@ -0,0 +1,4 @@
+# NOTE: This file is managed by Puppet
+
+export ZOOKEEPER_URL=<%= @zookeeper_url %>
+export BROKER_LIST=<%= @brokers_string %>
--
To view, visit https://gerrit.wikimedia.org/r/258220
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifec423daa5d9b2a3d3e6e4b0bd12dda5639b8594
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits