Gage has submitted this change and it was merged.

Change subject: logstash: hadoop: extract and infer job, task, attempt IDs
......................................................................


logstash: hadoop: extract and infer job, task, attempt IDs

Change-Id: I5fd74143384e7346bde36d6584d07a5db511bdab
---
M files/logstash/filter-gelf.conf
1 file changed, 19 insertions(+), 0 deletions(-)

Approvals:
  Gage: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/files/logstash/filter-gelf.conf b/files/logstash/filter-gelf.conf
index 40499eb..7e15f87 100644
--- a/files/logstash/filter-gelf.conf
+++ b/files/logstash/filter-gelf.conf
@@ -5,6 +5,8 @@
         replace => [ "channel", "%{SourceSimpleClassName}" ]
       }
       grok {
+        # Oniguruma syntax for ‘named capture’: (?<field_name>the pattern here)
+        # this overwrites the Thread field with a shorter more generic 
version, saving details to separate fields
         match => [ "Thread", "(?<Thread>IPC Server handler) 
%{NUMBER:IPC_Server_handler_id} on %{NUMBER:IPC_Server_handler_port}" ]
         match => [ "Thread", "(?<Thread>DeletionService) 
#%{NUMBER:DeletionService_id}" ]
         match => [ "Thread", "(?<Thread>LocalizerRunner) for 
%{NOTSPACE:ContainerId}" ]
@@ -13,6 +15,23 @@
         match => [ "Thread", 
"(?<Thread>CacheReplicationMonitor)\(%{NUMBER:CacheReplicationMonitor_id}\)" ]
         overwrite => [ "Thread" ]
       }
+      grok {
+        # so that we can search by job id and find tasks and attempts:
+        # extract attempt ID to field
+        # attempt_1409078537822_52431_m_000009_1
+        # attempt_1409078537822_55176_r_000000_0
+        match => [ "message", 
".*attempt_(?<Attempt_id>[:digit:]+_[:digit:]+_[mr]_[:digit:]+_[:digit:]+).*" ]
+        # extract task ID to field
+        # task_1409078537822_52431_m_000044
+        match => [ "message",       
".*task_(?<Task_id>[:digit:]+_[:digit:]+_[mr]_[:digit:]+).*" ]
+        # extract job ID to field
+        # job_1409078537822_52431
+        match => [ "message",         
".*job_(?<Job_id>[:digit:]+_[:digit:]+.*" ]
+        # infer task ID from attempt ID
+        match => [ "Attempt_id",           
"(?<Task_id>[:digit:]+_[:digit:]+_[mr]_[:digit:]+)_[:digit:]+" ]
+        # infer job ID from task ID
+        match => [ "Task_id",               
"(?<Job_id>[:digit:]+_[:digit:]+)_[mr]_[:digit:]+" ]
+      }
       dns {
         reverse => [ "host" ]
         action  => "replace"

-- 
To view, visit https://gerrit.wikimedia.org/r/168935
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5fd74143384e7346bde36d6584d07a5db511bdab
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Gage <jger...@wikimedia.org>
Gerrit-Reviewer: Gage <jger...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to