Ori.livneh has submitted this change and it was merged.

Change subject: logstash: Parse apache syslog messages
......................................................................


logstash: Parse apache syslog messages

Filter and parse apache syslog messages to remove unhelpfully noisy
messages and extract better classification information for searching and
sorting. The pattern used was built incrementally using
<https://grokdebug.herokuapp.com/> and a selection of log events taken
from production logstash.

This change also fixes the "message repeated" grok pattern to correctly
handle whitespace inserted by "#012" expansion.

Change-Id: Ica4deeff0bcbbc01236813aa921045fe1f5df346
---
M files/logstash/filter-syslog.conf
1 file changed, 40 insertions(+), 9 deletions(-)

Approvals:
  Ori.livneh: Verified; Looks good to me, approved



diff --git a/files/logstash/filter-syslog.conf 
b/files/logstash/filter-syslog.conf
index 24c6093..c42b180 100644
--- a/files/logstash/filter-syslog.conf
+++ b/files/logstash/filter-syslog.conf
@@ -1,3 +1,5 @@
+# vim:set sw=2 ts=2 sts=2 et
+# Parse syslog input
 filter {
   if [type] == "syslog" {
     # General syslog message cleanup
@@ -19,24 +21,24 @@
       grok {
         match => [
           "message",
-          "^message repeated %{NUMBER:repeated} times: 
\[%{GREEDYDATA:message}\]$"
+          "^message repeated %{NUMBER:repeated} times: 
\[\s*%{GREEDYDATA:message}\]$"
         ]
         overwrite => [ "message" ]
         named_captures_only => true
       }
     }
 
-    # Strip leading newline from hhvm messages
-    if [type] == "hhvm" {
-      mutate {
-        gsub => [ "message", "^\n", "" ]
-      }
-    }
-
-    # Mark kernel messages forwared because of hhvm as hhvm messages
+    # Mark kernel messages forwarded because of hhvm as hhvm messages
     if [type] == "kernel" and [message] =~ /hhvm/ {
       mutate {
         replace => [ "type",  "hhvm" ]
+      }
+    }
+
+    # Strip leading newline from hhvm messages
+    if [type] == "hhvm" {
+      mutate {
+        strip => [ "message" ]
       }
     }
 
@@ -48,5 +50,34 @@
         what => "previous"
       }
     }
+
+    if [type] == "apache2" {
+      # Ignore known mod_proxy_fcgi bugs
+      if [message] =~ /AH01070: Error parsing script headers/ {
+        drop {}
+      }
+      if [message] =~ /AH01068: Got bogus version \d/ {
+        drop {}
+      }
+
+      # Parse typical apache error format:
+      # [channel:level] [pid N] MSG? [client HOST:PORT] MSG, referer: URL
+      grok {
+        match => [
+          "message",
+          "^\[(%{WORD:channel}:)?%{LOGLEVEL:level}\]\s+(\[pid 
%{POSINT}\]\s+)?(?<message_prefix>[^\[]+)?(\[client 
%{IP:clientip}(:%{POSINT:clientport})?\]\s+)?%{DATA:message}(,\s+referer:\s+%{NOTSPACE:referrer})?$"
+        ]
+        overwrite => [ "message", "level" ]
+        named_captures_only => true
+      }
+
+      if [message_prefix] {
+        mutate {
+          "replace" => [ "message", "%{message_prefix}%{message}" ]
+          "remove_field" => [ "message_prefix" ]
+        }
+      }
+
+    } #end [type] == "apache2"
   }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/179480
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ica4deeff0bcbbc01236813aa921045fe1f5df346
Gerrit-PatchSet: 8
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: BryanDavis <[email protected]>
Gerrit-Reviewer: BryanDavis <[email protected]>
Gerrit-Reviewer: Chasemp <[email protected]>
Gerrit-Reviewer: Dzahn <[email protected]>
Gerrit-Reviewer: Gage <[email protected]>
Gerrit-Reviewer: Ori.livneh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to