Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/258220

Change subject: [WIP] Using more generic roles for kafka classes
......................................................................

[WIP] Using more generic roles for kafka classes

This will deprecate role::analytics::kafka::* in favor of 
role::kafka::analytics::*

This commit includes the new role on the analytics Kafka brokers, but does not
change any other users of role::analytics::kafka::config.  This will
be addressed in a separate commit after this is verified to work on brokers.

Bug: T120957
Change-Id: Ifec423daa5d9b2a3d3e6e4b0bd12dda5639b8594
---
M hieradata/common.yaml
M manifests/site.pp
A modules/role/manifests/kafka/analytics/broker.pp
A modules/role/manifests/kafka/analytics/config.pp
A modules/role/manifests/kafka/main/broker.pp
A modules/role/manifests/kafka/main/config.pp
A modules/role/templates/kafka/kafka-profile.sh.erb
7 files changed, 368 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/20/258220/1

diff --git a/hieradata/common.yaml b/hieradata/common.yaml
index 33b8538..a0eefd7 100644
--- a/hieradata/common.yaml
+++ b/hieradata/common.yaml
@@ -320,3 +320,36 @@
 glanceconfig:
   db_host: 'm5-master.eqiad.wmnet'
   bind_ip: "%{::ipaddress_eth0}"
+
+kafka_clusters:
+  # This is the analytics Kafka cluster, named just 'eqiad' for
+  # historical reasons.
+  eqiad:
+    brokers:
+      kafka1012.eqiad.wmnet:
+        id: 12  # Row A
+      kafka1013.eqiad.wmnet:
+        id: 13  # Row A
+      kafka1014.eqiad.wmnet:
+        id: 14  # Row C
+      kafka1018.eqiad.wmnet:
+        id: 18  # Row D
+      kafka1020.eqiad.wmnet:
+        id: 20  # Row D
+      kafka1022.eqiad.wmnet:
+        id: 22  # Row C
+
+  # TODO: set these once the hardware is in.
+  # main-eqiad:
+  #   brokers:
+  #     kafka1001.eqiad.wmnet:
+  #       id: 1001
+  #     kafka1002.eqiad.wmnet:
+  #       id: 1002
+  #
+  # main-codfw:
+  #   brokers:
+  #     kafka2001.eqiad.wmnet:
+  #       id: 2001
+  #     kafka2002.eqiad.wmnet:
+  #       id: 2002
diff --git a/manifests/site.pp b/manifests/site.pp
index 0f0cafd..2a82c80 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -1169,7 +1169,8 @@
     # addresses.
     interface::add_ip6_mapped { 'main': }
 
-    role analytics::kafka::server
+    role kafka::analytics::broker
+
     include role::analytics
     include standard
     include base::firewall
diff --git a/modules/role/manifests/kafka/analytics/broker.pp 
b/modules/role/manifests/kafka/analytics/broker.pp
new file mode 100644
index 0000000..03ecbe6
--- /dev/null
+++ b/modules/role/manifests/kafka/analytics/broker.pp
@@ -0,0 +1,133 @@
+# == Class role::kafka::analytics::broker
+# Sets up a broker belonging to an Analytics cluster.
+# This role works for any site.
+#
+# See modules/role/manifests/kafka/README.md for more information.
+#
+class role::kafka::analytics::broker {
+    include ::role::kafka::analytics::config
+
+    require_package('openjdk-7-jdk')
+
+    system::role { 'role::kafka::analytics::broker':
+        description => "Kafka Broker Server in the 
${::role::kafka::main::config::cluster_name} cluster",
+    }
+
+    # Make these local so kafka-profile.sh.erb can easily render them.
+    $zookeeper_url  = $::role::kafka::main::config::zookeeper_url
+    $brokers_string = $::role::kafka::main::config::brokers_string
+    # export ZOOKEEPER_URL and BROKER_LIST user environment variable.
+    # This makes it much more convenient to run kafka commands without having
+    # to specify the --zookeeper or --brokers flag every time.
+    file { '/etc/profile.d/kafka.sh':
+        owner   => 'root',
+        mode    => '0444',
+        content => template('role/kafka/kafka-profile.sh.erb'),
+    }
+
+    $log_dirs = $::realm ? {
+        'labs'       => ['/var/spool/kafka'],
+        # Production analytics Kafka brokers have more disks.
+        'production' => [
+            '/var/spool/kafka/a/data',
+            '/var/spool/kafka/b/data',
+            '/var/spool/kafka/c/data',
+            '/var/spool/kafka/d/data',
+            '/var/spool/kafka/e/data',
+            '/var/spool/kafka/f/data',
+            '/var/spool/kafka/g/data',
+            '/var/spool/kafka/h/data',
+            '/var/spool/kafka/i/data',
+            '/var/spool/kafka/j/data',
+            '/var/spool/kafka/k/data',
+            '/var/spool/kafka/l/data',
+        ],
+    }
+
+    $nofiles_ulimit = $::realm ? {
+        # Use default ulimit for labs kafka
+        'labs'       => 8192,
+        # Increase ulimit for production kafka.
+        'production' => 65536,
+    }
+
+    class { '::kafka::server':
+        log_dirs                        => $log_dirs,
+        brokers                         => 
$::role::kafka::analytics::config::brokers_config,
+        zookeeper_hosts                 => 
$::role::kafka::analytics::config::zookeeper_hosts,
+        zookeeper_chroot                => 
$::role::kafka::analytics::config::zookeeper_chroot,
+        nofiles_ulimit                  => $nofiles_ulimit,
+        jmx_port                        => 
$::role::kafka::analytics::config::jmx_port,
+
+        # Enable auto creation of topics.
+        auto_create_topics_enable       => true,
+
+        # (Temporarily?) disable auto leader rebalance.
+        # I am having issues with analytics1012, and I can't
+        # get Camus to consume properly for its preferred partitions
+        # if it is online and the leader.  - otto
+        auto_leader_rebalance_enable    => false,
+
+        default_replication_factor      => min(3, 
size($::role::kafka::analytics::config::brokers_array)),
+        # Start with a low number of (auto created) partitions per
+        # topic.  This can be increased manually for high volume
+        # topics if necessary.
+        num_partitions                  => 1,
+
+        # Bump this up to get a little more
+        # parallelism between replicas.
+        num_replica_fetchers            => 12,
+        # Setting this larger so that it is sure to be bigger
+        # than batch size from varnishkafka.
+        # See: https://issues.apache.org/jira/browse/KAFKA-766
+        # webrequest_bits is about 50k msgs/sec, and has 10 partitions.
+        # That's 5000 msgs/second/partition, so this should allow
+        # a partition to get behind by up to 10 seconds before
+        # removing it from the ISR.  This will be longer for
+        # less voluminous topics.
+        replica_lag_max_messages        => 50000,
+        # Setting this to a value according to 
https://cwiki.apache.org/confluence/display/KAFKA/FAQ#FAQ-HowtoreducechurnsinISR?WhendoesabrokerleavetheISR?
+        # 1 / MinFetcHRate * 1000.  I assume this result to be in seconds, 
since the default for max_ms is 10000.
+        # MinFetchRate ~= 45. 1/45*1000 ~= 22.  Setting this to 30 seconds to 
overcompensate.
+        # See also: 
http://ganglia.wikimedia.org/latest/graph_all_periods.php?title=&vl=&x=&n=&hreg%5B%5D=analytics102%5B12%5D.*&mreg%5B%5D=kafka.server.ReplicaFetcherManager.Replica-MinFetchRate.Value&gtype=line&glegend=show&aggregate=1
+        replica_lag_time_max_ms         => 30000,
+        # Allow for 16 seconds of latency when talking with Zookeeper.
+        # We seen an issue where (mainly or only) analytics1021 will
+        # pause for almost 12 seconds for a yet unknown reason.  Upping
+        # the session timeout here should give the broker enough time
+        # to get back in sync with Zookeeper before it is removed from the ISR.
+        # See T83561 (near the bottom)
+        # and: 
http://mail-archives.apache.org/mod_mbox/kafka-users/201407.mbox/%3CCAFbh0Q2f71qgs5JDNFxkm7SSdZyYMH=zpeoxotueqfkqexq...@mail.gmail.com%3E
+        zookeeper_connection_timeout_ms => 16000,
+        zookeeper_session_timeout_ms    => 16000,
+        # Use LinkedIn recommended settings with G1 garbage collector,
+        jvm_performance_opts            => '-server -XX:PermSize=48m 
-XX:MaxPermSize=48m -XX:+UseG1GC -XX:MaxGCPauseMillis=20 
-XX:InitiatingHeapOccupancyPercent=35',
+    }
+
+    # firewall Kafka Broker
+    ferm::service { 'kafka-broker':
+        proto  => 'tcp',
+        # TODO: A custom port can be configured in
+        # $brokers_config.  Extract the proper
+        # port to open from that config hash.
+        port   => 9999,
+        srange => '$ALL_NETWORKS',
+    }
+
+    # Include Kafka Server Jmxtrans class
+    # to send Kafka Broker metrics to Ganglia and statsd.
+    class { '::kafka::server::jmxtrans':
+        ganglia  => hiera('ganglia_aggregators', undef),
+        statsd   => hiera('statsd', undef),
+        jmx_port => $::kafka::server::jmx_port,
+        require  => Class['::kafka::server'],
+    }
+
+    # Monitor kafka in production
+    if $::realm == 'production' {
+        class { '::kafka::server::monitoring':
+            jmx_port => $::role::kafka::analytics::config::jmx_port,
+            nagios_servicegroup => "analytics_${::site}",
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/role/manifests/kafka/analytics/config.pp 
b/modules/role/manifests/kafka/analytics/config.pp
new file mode 100644
index 0000000..bef078b
--- /dev/null
+++ b/modules/role/manifests/kafka/analytics/config.pp
@@ -0,0 +1,51 @@
+# == Class role::kafka::analytics::config
+# Kafka config class for an Analytics Kafka cluster.
+# This class only contains variable definitions,
+# so it is safe to include anywhere in order to
+# reference them.
+#
+# See modules/role/manifests/kafka/README.md for more information.
+#
+class role::kafka::analytics::config {
+    # Choose cluster name from hiera, or default appropriately
+    # in labs or production
+    $cluster_name = hiera('kafka_cluster_name', $::realm ? {
+        'labs'       => "analytics-${::labsproject}"
+        'production' => $::site ? {
+            'eqiad' => 'eqiad',
+            default => "analytics-${::site}"
+        }
+    })
+
+    # Get all kafka cluster configs from hiera,
+    # or default to only single node cluster for $cluster_name.
+    $all_clusters   = hiera('kafka_clusters', {
+        $cluster_name => {
+            'brokers' => {
+                $::fqdn => {
+                    'id' => '1'
+                },
+            }
+        }
+    })
+
+    # Config hash suitable for passing to kafka::server's broker param
+    $brokers_config = $clusters[$cluster_name]['brokers']
+    # Array of broker hostnames in thie Kafka cluster
+    $brokers_array  = keys($brokers_config)
+    # Comma separate string of broker hostname:ports,
+    # useful in many client configs.
+    $brokers_string = inline_template('<%= @brokers_config.keys.sort.map { |b| 
"#{b}:#{@brokers_config[b].fetch("port", 9092)}" }.join(",") %>')
+
+    $jmx_port       = 9999
+
+    # jmxtrans renders hostname metrics with underscores and
+    # suffixed with the jmx port.  Build a graphite
+    # wildcard to match these.
+    # E.g. kafka1012.eqiad.wmnet -> kafka1012_eqiad_wmnet_9999
+    $brokers_graphite_wildcard = inline_template('{<%= 
@brokers_array.join("_#{@jmx_port},").tr(".","_") + "_#{@jmx_port}" %>}')
+
+    $zookeeper_hosts  = keys(hiera('zookeeper_hosts'))
+    $zookeeper_chroot = "/kafka/${cluster_name}"
+    $zookeeper_url    = inline_template("<%= @zookeeper_hosts.sort.join(',') 
%><%= @zookeeper_chroot %>")
+}
diff --git a/modules/role/manifests/kafka/main/broker.pp 
b/modules/role/manifests/kafka/main/broker.pp
new file mode 100644
index 0000000..9dfb12c
--- /dev/null
+++ b/modules/role/manifests/kafka/main/broker.pp
@@ -0,0 +1,99 @@
+# == Class role::kafka::main::broker
+# Sets up a broker belonging to a main Kafka cluster.
+# This role works for any site.
+#
+# See modules/role/manifests/kafka/README.md for more information.
+#
+class role::kafka::main::broker {
+    include role::kafka::main::config
+
+    require_package('openjdk-7-jdk')
+
+    system::role { 'role::kafka::main::broker':
+        description => "Kafka Broker Server in the 
${::role::kafka::main::config::cluster_name} cluster",
+    }
+
+    # Make these local so kafka-profile.sh.erb can easily render them.
+    $zookeeper_url  = $::role::kafka::main::config::zookeeper_url
+    $brokers_string = $::role::kafka::main::config::brokers_string
+    # export ZOOKEEPER_URL and BROKER_LIST user environment variable.
+    # This makes it much more convenient to run kafka commands without having
+    # to specify the --zookeeper or --brokers flag every time.
+    file { '/etc/profile.d/kafka.sh':
+        owner   => 'root',
+        mode    => '0444',
+        content => template('role/kafka/kafka-profile.sh.erb'),
+    }
+
+    $log_dirs = $::realm ? {
+        'labs'       => ['/var/spool/kafka'],
+        # Production main Kafka brokers have more disks.
+        'production' => [
+            # TODO: fill this in with real partitions when we have hardware.
+            '/var/spool/kafka/a/data',
+            '/var/spool/kafka/b/data',
+            '/var/spool/kafka/c/data',
+            '/var/spool/kafka/d/data',
+        ],
+    }
+
+    $nofiles_ulimit = $::realm ? {
+        # Use default ulimit for labs kafka
+        'labs'       => 8192,
+        # Increase ulimit for production kafka.
+        'production' => 65536,
+    }
+
+    class { '::kafka::server':
+        log_dirs                        => $log_dirs,
+        brokers                         => 
$::role::kafka::main::config::brokers_config
+        zookeeper_hosts                 => 
$::role::kafka::main::config::zookeeper_hosts,
+        zookeeper_chroot                => 
$::role::kafka::main::config::zookeeper_chroot,
+        nofiles_ulimit                  => $nofiles_ulimit,
+        jmx_port                        => 
$::role::kafka::analytics::config::jmx_port,
+
+        # Enable auto creation of topics.
+        auto_create_topics_enable       => true,
+
+        # (Temporarily?) disable auto leader rebalance.
+        # I am having issues with analytics1012, and I can't
+        # get Camus to consume properly for its preferred partitions
+        # if it is online and the leader.  - otto
+        auto_leader_rebalance_enable    => false,
+
+        default_replication_factor      => min(3, 
size($::role::kafka::main::config::brokers_array)),
+        # Start with a low number of (auto created) partitions per
+        # topic.  This can be increased manually for high volume
+        # topics if necessary.
+        num_partitions                  => 1,
+
+        # Use LinkedIn recommended settings with G1 garbage collector,
+        jvm_performance_opts            => '-server -XX:PermSize=48m 
-XX:MaxPermSize=48m -XX:+UseG1GC -XX:MaxGCPauseMillis=20 
-XX:InitiatingHeapOccupancyPercent=35',
+    }
+
+    # firewall Kafka Broker
+    ferm::service { 'kafka-broker':
+        proto  => 'tcp',
+        # TODO: A custom port can be configured in
+        # $brokers_config.  Extract the proper
+        # port to open from that config hash.
+        port   => 9999,
+        srange => '$ALL_NETWORKS',
+    }
+
+    # Include Kafka Server Jmxtrans class
+    # to send Kafka Broker metrics to Ganglia and statsd.
+    class { '::kafka::server::jmxtrans':
+        ganglia  => hiera('ganglia_aggregators', undef),
+        statsd   => hiera('statsd', undef),
+        jmx_port => $::kafka::server::jmx_port,
+        require  => Class['::kafka::server'],
+    }
+
+    # Monitor kafka in production
+    if $::realm == 'production' {
+        class { '::kafka::server::monitoring':
+            jmx_port => $::role::kafka::analytics::config::jmx_port,
+        }
+    }
+}
diff --git a/modules/role/manifests/kafka/main/config.pp 
b/modules/role/manifests/kafka/main/config.pp
new file mode 100644
index 0000000..7b47ba6
--- /dev/null
+++ b/modules/role/manifests/kafka/main/config.pp
@@ -0,0 +1,46 @@
+# == Class role::kafka::main::config
+# Kafka config class for a main Kafka cluster.
+# This class only contains variable definitions,
+# so it is safe to include anywhere in order to
+# reference them.
+#
+class role::kafka::main::config {
+    # Choose cluster name from hiera, or default appropriately
+    # in labs or production
+    $cluster_name = hiera('kafka_cluster_name', $::realm ? {
+        'labs'       => "main-${::labsproject}"
+        'production' => "main-${::site}"
+    })
+
+    # Get all kafka cluster configs from hiera,
+    # or default to only single node cluster for $cluster_name.
+    $all_clusters   = hiera('kafka_clusters', {
+        $cluster_name => {
+            'brokers' => {
+                $::fqdn => {
+                    'id' => '1'
+                },
+            }
+        }
+    })
+
+    # Config hash suitable for passing to kafka::server's broker param
+    $brokers_config = $clusters[$cluster_name]['brokers']
+    # Array of broker hostnames in thie Kafka cluster
+    $brokers_array  = keys($brokers_config)
+    # Comma separate string of broker hostname:ports,
+    # useful in many client configs.
+    $brokers_string = inline_template('<%= @brokers_config.keys.sort.map { |b| 
"#{b}:#{@brokers_config[b].fetch("port", 9092)}" }.join(",") %>')
+
+    $jmx_port       = 9999
+
+    # jmxtrans renders hostname metrics with underscores and
+    # suffixed with the jmx port.  Build a graphite
+    # wildcard to match these.
+    # E.g. kafka1012.eqiad.wmnet -> kafka1012_eqiad_wmnet_9999
+    $brokers_graphite_wildcard = inline_template('{<%= 
@brokers_array.join("_#{@jmx_port},").tr(".","_") + "_#{@jmx_port}" %>}')
+
+    $zookeeper_hosts  = keys(hiera('zookeeper_hosts'))
+    $zookeeper_chroot = "/kafka/${cluster_name}"
+    $zookeeper_url    = inline_template("<%= @zookeeper_hosts.sort.join(',') 
%><%= @zookeeper_chroot %>")
+}
diff --git a/modules/role/templates/kafka/kafka-profile.sh.erb 
b/modules/role/templates/kafka/kafka-profile.sh.erb
new file mode 100644
index 0000000..0eaa625
--- /dev/null
+++ b/modules/role/templates/kafka/kafka-profile.sh.erb
@@ -0,0 +1,4 @@
+# NOTE:  This file is managed by Puppet
+
+export ZOOKEEPER_URL=<%= @zookeeper_url %>
+export BROKER_LIST=<%= @brokers_string %>

-- 
To view, visit https://gerrit.wikimedia.org/r/258220
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifec423daa5d9b2a3d3e6e4b0bd12dda5639b8594
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to