Ottomata has submitted this change and it was merged. Change subject: Moving as many easy filters to oxygen. gadolinium's socat process is dropping packets with udp2log running. ......................................................................
Moving as many easy filters to oxygen. gadolinium's socat process is dropping packets with udp2log running. Change-Id: Ib0c7b8bf3925b828739b80726efe5210c57671a2 --- M manifests/misc/statistics.pp M templates/udp2log/filters.gadolinium.erb M templates/udp2log/filters.oxygen.erb 3 files changed, 79 insertions(+), 46 deletions(-) Approvals: Ottomata: Verified; Looks good to me, approved jenkins-bot: Verified diff --git a/manifests/misc/statistics.pp b/manifests/misc/statistics.pp index cd86847..fe7a426 100644 --- a/manifests/misc/statistics.pp +++ b/manifests/misc/statistics.pp @@ -690,21 +690,21 @@ destination => "/a/squid/archive/arabic-banner", } - # sampled-1000 logs from gadolinium + # sampled-1000 logs from emery misc::statistics::rsync_job { "sampled_1000": - source => "gadolinium.wikimedia.org::udp2log/webrequest/archive/sampled-1000*.gz", + source => "emery.wikimedia.org::udp2log/webrequest/archive/sampled-1000*.gz", destination => "/a/squid/archive/sampled", } - # edit logs from gadolinium + # edit logs from oxygen misc::statistics::rsync_job { "edits": - source => "gadolinium.wikimedia.org::udp2log/webrequest/archive/edits*.gz", + source => "oxygen.wikimedia.org::udp2log/webrequest/archive/edits*.gz", destination => "/a/squid/archive/edits", } - # mobile logs from gadolinium + # mobile logs from oxygen misc::statistics::rsync_job { "mobile": - source => "gadolinium.wikimedia.org::udp2log/webrequest/archive/mobile*.gz", + source => "oxygen.wikimedia.org::udp2log/webrequest/archive/mobile*.gz", destination => "/a/squid/archive/mobile", } diff --git a/templates/udp2log/filters.gadolinium.erb b/templates/udp2log/filters.gadolinium.erb index 9b4eb9b..e954fb5 100644 --- a/templates/udp2log/filters.gadolinium.erb +++ b/templates/udp2log/filters.gadolinium.erb @@ -5,37 +5,6 @@ ### udp2log packet loss monitoring pipe 10 /usr/bin/packet-loss 10 '\t' >> <%= webrequest_log_directory %>/packet-loss.log -### 0.0001 of all udp2log messages -# This log file is also on emery for redundancy -file 1000 <%= webrequest_log_directory %>/sampled-1000.tsv.log - -### All edits -pipe 1 /usr/bin/udp-filter -F '\t' -p action=submit,action=edit >> <%= webrequest_log_directory %>/edits.tsv.log - - -<% -# pull in $role::cache::configuration::active_nodes -# to find mobile host names and build a regex on which to grep. -cache_configuration = scope.lookupvar('::role::cache::configuration::active_nodes') -mobile_hosts_regex = '(' + cache_configuration['production']['mobile'].values.flatten.join('|') + ')' --%> - -### Mobile traffic filter -# All mobile traffic goes through CC-cp1044 varnish hosts. -pipe 100 /bin/grep -P '<%= mobile_hosts_regex %>' >> <%= webrequest_log_directory %>/mobile-sampled-100.tsv.log - -### Fundraising -## Landing pages -pipe 1 /usr/bin/udp-filter -F '\t' -d wikimediafoundation.org,donate.wikimedia.org >> <%= fundraising_log_directory %>/logs/landingpages.tsv.log - -## Banner Impressions -pipe 100 /usr/bin/udp-filter -F '\t' -p Special:RecordImpression\?banner=,Special:RecordImpression\?result= >> <%= fundraising_log_directory %>/logs/bannerImpressions-sampled100.tsv.log - - - -### All 5xx error responses -- domas (now using udp-filter instead of 5xx-filter). -# pipe 1 <%= webrequest_filter_directory %>/5xx-filter | awk -W interactive '$9 !~ "upload.wikimedia.org|query.php"' >> <%= webrequest_log_directory %>/5xx.tsv.log -pipe 1 /usr/bin/udp-filter -F '\t' -r -s '^5' | awk -W interactive '$9 !~ "upload.wikimedia.org|query.php"' >> <%= webrequest_log_directory %>/5xx.tsv.log ### webstatscollector -- domas # NOTE! The files that filter and collector generates @@ -45,14 +14,48 @@ pipe 1 /usr/local/bin/filter | log2udp -h 127.0.0.1 -p 3815 +### Fundraising +## Landing pages +pipe 1 /usr/bin/udp-filter -F '\t' -d wikimediafoundation.org,donate.wikimedia.org >> <%= fundraising_log_directory %>/logs/landingpages.tsv.log -### Vrije Universiteit -# Contact: <%= scope.lookupvar('contacts::udp2log::vrije_universiteit_contact') %> -pipe 10 awk -f <%= webrequest_filter_directory %>/vu.awk | log2udp -h 130.37.198.252 -p 9999 +## Banner Impressions +pipe 100 /usr/bin/udp-filter -F '\t' -p Special:RecordImpression\?banner=,Special:RecordImpression\?result= >> <%= fundraising_log_directory %>/logs/bannerImpressions-sampled100.tsv.log -### University of Minnesota -# Contact: <%= scope.lookupvar('contacts::udp2log::university_minnesota_contact') %> -# Former Contact: <%= scope.lookupvar('contacts::udp2log::university_minnesota_contact_former') %> -# Former contact: <%= scope.lookupvar('contacts::udp2log::university_minnesota_contact_former2') %> -pipe 10 awk -f <%= webrequest_filter_directory %>/minnesota.awk | log2udp -h bento.cs.umn.edu -p 9999 + +# ----- The filters below have been moved to oxygen ----- + +<% +# pull in $role::cache::configuration::active_nodes +# to find mobile host names and build a regex on which to grep. +cache_configuration = scope.lookupvar('::role::cache::configuration::active_nodes') +mobile_hosts_regex = '(' + cache_configuration['production']['mobile'].values.flatten.join('|') + ')' +-%> +# +# ### Mobile traffic filter +# # All mobile traffic goes through CC-cp1044 varnish hosts. +# pipe 100 /bin/grep -P '<%= mobile_hosts_regex %>' >> <%= webrequest_log_directory %>/mobile-sampled-100.tsv.log +# +# +# ### 0.0001 of all udp2log messages +# # This log file is also on emery for redundancy +# file 1000 <%= webrequest_log_directory %>/sampled-1000.tsv.log +# +# +# ### All 5xx error responses -- domas (now using udp-filter instead of 5xx-filter). +# # pipe 1 <%= webrequest_filter_directory %>/5xx-filter | awk -W interactive '$9 !~ "upload.wikimedia.org|query.php"' >> <%= webrequest_log_directory %>/5xx.tsv.log +# pipe 1 /usr/bin/udp-filter -F '\t' -r -s '^5' | awk -W interactive '$9 !~ "upload.wikimedia.org|query.php"' >> <%= webrequest_log_directory %>/5xx.tsv.log +# +# +# ### All edits +# pipe 1 /usr/bin/udp-filter -F '\t' -p action=submit,action=edit >> <%= webrequest_log_directory %>/edits.tsv.log +# +# ### Vrije Universiteit +# # Contact: <%= scope.lookupvar('contacts::udp2log::vrije_universiteit_contact') %> +# pipe 10 awk -f <%= webrequest_filter_directory %>/vu.awk | log2udp -h 130.37.198.252 -p 9999 +# +# ### University of Minnesota +# # Contact: <%= scope.lookupvar('contacts::udp2log::university_minnesota_contact') %> +# # Former Contact: <%= scope.lookupvar('contacts::udp2log::university_minnesota_contact_former') %> +# # Former contact: <%= scope.lookupvar('contacts::udp2log::university_minnesota_contact_former2') %> +# pipe 10 awk -f <%= webrequest_filter_directory %>/minnesota.awk | log2udp -h bento.cs.umn.edu -p 9999 diff --git a/templates/udp2log/filters.oxygen.erb b/templates/udp2log/filters.oxygen.erb index 0f29281..1e48033 100644 --- a/templates/udp2log/filters.oxygen.erb +++ b/templates/udp2log/filters.oxygen.erb @@ -7,4 +7,34 @@ # Capture all logs with 'zero=' set. The X-Analytics header is set with this # by mobile varnish frontends upon getting a Wikipedia Zero request. -pipe 1 /bin/grep -P 'zero=\d{3}-\d{2}' >> /a/log/webrequest/zero.tsv.log +pipe 1 /bin/grep -P 'zero=\d{3}-\d{2}' >> <%= log_directory %>/zero.tsv.log + +### All edits +pipe 1 /usr/bin/udp-filter -F '\t' -p action=submit,action=edit >> <%= log_directory %>/edits.tsv.log + +<% +# pull in $role::cache::configuration::active_nodes +# to find mobile host names and build a regex on which to grep. +cache_configuration = scope.lookupvar('::role::cache::configuration::active_nodes') +mobile_hosts_regex = '(' + cache_configuration['production']['mobile'].values.flatten.join('|') + ')' +-%> + +### Mobile traffic filter +# All mobile traffic goes through CC-cp1044 varnish hosts. +pipe 100 /bin/grep -P '<%= mobile_hosts_regex %>' >> <%= log_directory %>/mobile-sampled-100.tsv.log + +### All 5xx error responses -- domas (now using udp-filter instead of 5xx-filter). +# pipe 1 <%= webrequest_filter_directory %>/5xx-filter | awk -W interactive '$9 !~ "upload.wikimedia.org|query.php"' >> <%= log_directory %>/5xx.tsv.log +pipe 1 /usr/bin/udp-filter -F '\t' -r -s '^5' | awk -W interactive '$9 !~ "upload.wikimedia.org|query.php"' >> <%= log_directory %>/5xx.tsv.log + + +### Vrije Universiteit +# Contact: <%= scope.lookupvar('contacts::udp2log::vrije_universiteit_contact') %> +pipe 10 awk -f <%= webrequest_filter_directory %>/vu.awk | log2udp -h 130.37.198.252 -p 9999 + +### University of Minnesota +# Contact: <%= scope.lookupvar('contacts::udp2log::university_minnesota_contact') %> +# Former Contact: <%= scope.lookupvar('contacts::udp2log::university_minnesota_contact_former') %> +# Former contact: <%= scope.lookupvar('contacts::udp2log::university_minnesota_contact_former2') %> +pipe 10 awk -f <%= webrequest_filter_directory %>/minnesota.awk | log2udp -h bento.cs.umn.edu -p 9999 + -- To view, visit https://gerrit.wikimedia.org/r/75342 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ib0c7b8bf3925b828739b80726efe5210c57671a2 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ottomata <o...@wikimedia.org> Gerrit-Reviewer: Ottomata <o...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits