Elukey has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/397765 )

Change subject: role::cache::misc: add a test Varnishkafka instance
......................................................................

role::cache::misc: add a test Varnishkafka instance

The new Varnishkafka instance will send webrequest
traffic to the Kafka Jumbo cluster via TLS.
This change requires work by Cergen before being
merged (https://wikitech.wikimedia.org/wiki/Cergen).

More info about Varnishkafka TLS/SSL config:
https://github.com/edenhill/librdkafka/wiki/Using-SSL-with-librdkafka

Change-Id: If275c782cbd6320f59e6a4f51bcf3a6d61292a48
---
A modules/profile/manifests/cache/kafka/webrequest/duplicate.pp
M modules/profile/manifests/cache/misc.pp
2 files changed, 138 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/65/397765/1

diff --git a/modules/profile/manifests/cache/kafka/webrequest/duplicate.pp 
b/modules/profile/manifests/cache/kafka/webrequest/duplicate.pp
new file mode 100644
index 0000000..f07c2e1
--- /dev/null
+++ b/modules/profile/manifests/cache/kafka/webrequest/duplicate.pp
@@ -0,0 +1,134 @@
+# === class profile::cache::kafka::webrequest::duplicate
+#
+# Sets up a varnishkafka instance producing varnish
+# webrequest logs to the analytics Jumbo brokers in eqiad.
+# This is a temporary profile to test the new Kafka cluster before switching
+# real production traffic to it.
+#
+# === Parameters
+#
+# [*cache_cluster*]
+#   the name of the cache cluster
+#
+# [*statsd_host*]
+#   the host to send statsd data to.
+#
+class profile::cache::kafka::webrequest::duplicate(
+    $cache_cluster     = hiera('cache::cluster'),
+    $statsd_host       = hiera('statsd'),
+    $ssl_key_password  = 
hiera('profile::cache::kafka::webrequest::duplicate::ssl_key_password', undef),
+) {
+    $config = kafka_config('jumbo-eqiad')
+    # NOTE: This is used by inheriting classes role::cache::kafka::*
+    $kafka_brokers = $config['brokers']['array']
+
+    $topic = "webrequest_${cache_cluster}"
+    # These used to be parameters, but I don't really see why given we never 
change
+    # them
+    $varnish_name           = 'frontend'
+    $varnish_svc_name       = 'varnish-frontend'
+    $kafka_protocol_version = '0.9.0.1'
+
+    # For any info about the following settings, please check
+    # profile::cache::kafka::webrequest.
+    $varnish_opts = {
+        'q' => 'ReqMethod ne "PURGE" and not Timestamp:Pipe and not 
ReqHeader:Upgrade ~ "[wW]ebsocket" and not HttpGarbage',
+        'T' => '1500',
+        'L' => '10000'
+    }
+    $conf_template = 'varnishkafka/varnishkafka_v4.conf.erb'
+
+    # Note: the newer version of Varnishkafka (compatible with Varnish 4)
+    # needs to specify if the timestamp formatter should output the time
+    # when the request started to be processed by Varnish (SLT_Timestamp Start)
+    # or the time of the response flush (SLT_Timestamp Resp).
+    # The "end:" prefix forces the latter and it is not be part of the final 
output.
+    $timestamp_formatter = '%{end:%FT%T@dt}t'
+
+    # estimated peak reqs/sec we need to reasonably handle on a single cache.
+    # The current maximal "reasonable" case is in the text cluster, where if we
+    # have mutiple DCs depooled in DNS and ~8 servers in the remaining DC to
+    # split traffic, we could peak at ~9000
+    $peak_rps_estimate = 9000
+
+    # TLS/SSL configuration
+    $ssl_ca_location = '/etc/ssl/certs/Puppet_Internal_CA.pem'
+    $ssl_location = '/etc/varnishkafka/ssl'
+
+    $ssl_key_location_secrets_path = 
'certificates/varnishkafka/varnishkafka.key.pem'
+    $ssl_key_location = "${ssl_location}/varnishkafka.key.pem"
+
+    $ssl_certificate_secrets_path = 
'certificates/varnishkafka/varnishkafka.key.public.pem'
+    $ssl_certificate_location = "${ssl_location}/varnishkafka.key.public.pem"
+
+    file { $ssl_location:
+        ensure => 'directory',
+        owner  => 'varnishkafka',
+        group  => 'varnishkafka',
+        mode   => '0555',
+    }
+    file { $ssl_key_location:
+        content => secret($ssl_key_location_secrets_path),
+        owner   => 'varnishkafka',
+        group   => 'varnishkafka',
+        mode    => '0440',
+        before  => Class['varnishkafka::instance'],
+    }
+
+    file { $ssl_certificate_location:
+        content => secret($ssl_certificate_secrets_path),
+        owner   => 'varnishkafka',
+        group   => 'varnishkafka',
+        mode    => '0444',
+        before  => Class['varnishkafka::instance'],
+    }
+
+    varnishkafka::instance { 'webrequest-jumbo-duplicate':
+        # FIXME - top-scope var without namespace, will break in puppet 2.8
+        # lint:ignore:variable_scope
+        brokers                      => $kafka_brokers,
+        # lint:endignore
+        topic                        => $topic,
+        format_type                  => 'json',
+        compression_codec            => 'snappy',
+        varnish_name                 => $varnish_name,
+        varnish_svc_name             => $varnish_svc_name,
+        varnish_opts                 => $varnish_opts,
+        # Note: fake_tag tricks varnishkafka into allowing hardcoded string 
into a JSON field.
+        # Hardcoding the $fqdn into hostname rather than using %l to account 
for
+        # possible slip ups where varnish only writes the short hostname for 
%l.
+        format                       => "%{fake_tag0@hostname?${::fqdn}}x 
%{@sequence!num?0}n ${timestamp_formatter} 
%{Varnish:time_firstbyte@time_firstbyte!num?0.0}x %{X-Client-IP@ip}o 
%{X-Cache-Status@cache_status}o %{@http_status}s %{@response_size!num?0}b 
%{@http_method}m %{Host@uri_host}i %{@uri_path}U %{@uri_query}q 
%{Content-Type@content_type}o %{Referer@referer}i %{User-Agent@user_agent}i 
%{Accept-Language@accept_language}i %{X-Analytics@x_analytics}o %{Range@range}i 
%{X-Cache@x_cache}o",
+        message_send_max_retries     => 3,
+        # Buffer up to 80s at our expected maximum reasonable rate
+        queue_buffering_max_messages => 80 * $peak_rps_estimate,
+        # Our aim here is to not send batches more often than once per second,
+        # given our expected maximum reasonable rate
+        batch_num_messages           => $peak_rps_estimate,
+        # On caches with high traffic (text and upload), we have seen
+        # message drops from esams during high load time with a large
+        # request ack timeout (it was 30 seconds).
+        # The vanrishkafka buffer gets too full and it drops messages.
+        # Perhaps this is a buffer bloat problem.
+        # Note that varnishkafka will retry a timed-out produce request.
+        topic_request_timeout_ms     => 2000,
+        # 1 means only the leader broker must ACK each produce request
+        topic_request_required_acks  => '1',
+        # Write out stats to varnishkafka.stats.json
+        # this often.  This is set at 15 so that
+        # stats will be fresh when polled from gmetad.
+        log_statistics_interval      => 15,
+        conf_template                => $conf_template,
+        force_protocol_version       => $kafka_protocol_version,
+        #TLS/SSL config
+        ssl_enabled                  => true,
+        ssl_ca_location              => $ssl_ca_location,
+        ssl_key_password             => $ssl_key_password,
+        ssl_key_location             => $ssl_key_location,
+        ssl_certificate_location     => $ssl_certificate_location,
+    }
+
+    # Make sure varnishes are configured and started for the first time
+    # before the instances as well, or they fail to start initially...
+    Service <| tag == 'varnish_instance' |> -> 
Varnishkafka::Instance['webrequest-jumbo-duplicate']
+
+}
diff --git a/modules/profile/manifests/cache/misc.pp 
b/modules/profile/manifests/cache/misc.pp
index d875516..13baa50 100644
--- a/modules/profile/manifests/cache/misc.pp
+++ b/modules/profile/manifests/cache/misc.pp
@@ -12,6 +12,10 @@
 ) {
     require ::profile::cache::base
 
+    # Temp. experiment to duplicate/mirror the webrequest data
+    # to the new Kafka Jumbo brokers.
+    include ::profile::cache::kafka::webrequest::duplicate
+
     $cache_route = $cache_route_table[$::site]
     class { 'tlsproxy::prometheus': }
     class { 'prometheus::node_vhtcpd': }

-- 
To view, visit https://gerrit.wikimedia.org/r/397765
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If275c782cbd6320f59e6a4f51bcf3a6d61292a48
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Elukey <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to