Ottomata has submitted this change and it was merged.

Change subject: Run webstatscollector (modified) with kafkatee on analytics1003
......................................................................


Run webstatscollector (modified) with kafkatee on analytics1003

This commit also starts kafkatee consuming from the full webrequest
stream (all 4 topics).  Here we go!

Change-Id: I7be5f44075cae9899ece80de8e16c71b0f2ddc56
---
M manifests/role/analytics/kafkatee.pp
M manifests/site.pp
2 files changed, 108 insertions(+), 0 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved



diff --git a/manifests/role/analytics/kafkatee.pp 
b/manifests/role/analytics/kafkatee.pp
index 56b73bb..5b10312 100644
--- a/manifests/role/analytics/kafkatee.pp
+++ b/manifests/role/analytics/kafkatee.pp
@@ -88,6 +88,69 @@
     }
 }
 
+# == role::analytics::kafkatee::webstatscollector
+# We want to run webstatscollector via kafkatee for testing.
+# Some of the production (role::logging::webstatscollector)
+# configs are not relevant here, so we copy the class
+# and edit it.
+#
+# webstatscollector needs all of the webrequest logs,
+# so this class makes sure all webrequest topic input
+# classes are included.
+class role::analytics::kafkatee::webrequest::webstatscollector {
+    include role::analytics::kafkatee::input::webrequest
+
+    # webstats-collector process writes dump files here.
+    $webstats_dumps_directory = '/srv/webstats/dumps'
+
+    package { 'webstatscollector': ensure => installed }
+    service { 'webstats-collector':
+        ensure     => 'running',
+        hasstatus  => 'false',
+        hasrestart => 'true',
+        require    => Package['webstatscollector'],
+    }
+
+    # Gzip pagecounts files hourly.
+    cron { 'webstats-dumps-gzip':
+        command => "/bin/gzip 
${webstats_dumps_directory}/pagecounts-????????-?????? 2> /dev/null",
+        minute  => 2,
+        user    => 'nobody',
+        require => Service['webstats-collector'],
+    }
+
+    # Delete webstats dumps that are older than 10 days daily.
+    cron { 'webstats-dumps-delete':
+        command => "/usr/bin/find ${webstats_dumps_directory} -maxdepth 1 
-type f -mtime +10 -delete",
+        minute  => 28,
+        hour    => 1,
+        user    => 'nobody',
+        require => Service['webstats-collector'],
+    }
+
+    # kafkatee outputs into webstats filter and forwards to webstats collector 
via log2udp
+    ::kafkatee::output { 'webstatscollector':
+        destination => "/usr/local/bin/filter | /usr/bin/log2udp -h localhost 
-p 3815",
+        type        => 'pipe',
+        require     => Service['webstats-collector'],
+    }
+}
+
+
+
+# == Class role::analytics::kafkatee::input::webrequest
+# Includes each of the 4 webrequest topics as input
+# You can use this class, or if you want to consume
+# only an individual topic, include one of the
+# topic specific classes manually.
+class role::analytics::kafkatee::input::webrequest {
+    include role::analytics::kafkatee::input::webrequest::mobile
+    include role::analytics::kafkatee::input::webrequest::text
+    include role::analytics::kafkatee::input::webrequest::bits
+    include role::analytics::kafkatee::input::webrequest::upload
+}
+
+
 
 # == Class role::analytics::kafkatee::input::webrequest::mobile
 # Sets up a kafkatee input to consume from the webrequest_mobile topic
@@ -103,3 +166,47 @@
         offset      => 'stored',
     }
 }
+# == Class role::analytics::kafkatee::input::webrequest::text
+# Sets up a kafkatee input to consume from the webrequest_text topic
+# This is its own class so that if a kafkatee instance wants
+# to consume from multiple topics, it may include each
+# topic as a class.
+#
+class role::analytics::kafkatee::input::webrequest::text {
+    ::kafkatee::input { 'kafka-webrequest_text':
+        topic       => 'webrequest_text',
+        partitions  => '0-11',
+        options     => { 'encoding' => 'json' },
+        offset      => 'stored',
+    }
+}
+# == Class role::analytics::kafkatee::input::webrequest::bits
+# Sets up a kafkatee input to consume from the webrequest_bits topic
+# This is its own class so that if a kafkatee instance wants
+# to consume from multiple topics, it may include each
+# topic as a class.
+#
+class role::analytics::kafkatee::input::webrequest::bits {
+    ::kafkatee::input { 'kafka-webrequest_bits':
+        topic       => 'webrequest_bits',
+        partitions  => '0-11',
+        options     => { 'encoding' => 'json' },
+        offset      => 'stored',
+    }
+}
+# == Class role::analytics::kafkatee::input::webrequest::upload
+# Sets up a kafkatee input to consume from the webrequest_upload topic
+# This is its own class so that if a kafkatee instance wants
+# to consume from multiple topics, it may include each
+# topic as a class.
+#
+class role::analytics::kafkatee::input::webrequest::upload {
+    ::kafkatee::input { 'kafka-webrequest_upload':
+        topic       => 'webrequest_upload',
+        partitions  => '0-11',
+        options     => { 'encoding' => 'json' },
+        offset      => 'stored',
+    }
+}
+
+
diff --git a/manifests/site.pp b/manifests/site.pp
index a2b2cf9..d87505a 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -109,6 +109,7 @@
 
     include role::analytics
     include role::analytics::kafkatee::webrequest::mobile
+    include role::analytics::kafkatee::webrequest::webstatscollector
 
     # run misc udp2log here for sqstat
     include role::logging::udp2log::misc

-- 
To view, visit https://gerrit.wikimedia.org/r/155740
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I7be5f44075cae9899ece80de8e16c71b0f2ddc56
Gerrit-PatchSet: 2
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to