Ori.livneh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/75560


Change subject: (WIP) Parse errors and write to MongoDB
......................................................................

(WIP) Parse errors and write to MongoDB

This change adds a simple tool for reading exceptions and fatals on a UDP port
(hard-coded to 8423), parsing them into JSON documents, and writing them to a
MongoDB instance. This is a quick hack; needs to be designed further, but could
be tested on labs.

Bug: 49757
Change-Id: Ic892d3f82487ce6df098f0bd2dd13bc292814fa8
---
A fluoride.py
1 file changed, 164 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/tools/fluoride 
refs/changes/60/75560/1

diff --git a/fluoride.py b/fluoride.py
new file mode 100644
index 0000000..cd6dea2
--- /dev/null
+++ b/fluoride.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+   fluoride
+   ~~~~~~~~
+
+   Listen on UDP port for combined MediaWiki exception/fatal logs; parse each
+   error to a document; write document to MongoDB instance.
+
+"""
+import datetime
+import hashlib
+import io
+import re
+import socket
+
+import pymongo
+
+
+# Write data to this MongoDB instance:
+MONGO_URI = 'mongodb://user:pass@127.0.0.1:27017'
+
+# Listen for errors & exceptions on this UDP interface/port:
+LISTEN_ADDR = ('0.0.0.0', 8423)
+
+
+EXCEPTION_RE = ' '.join((
+    r'(?P<timestamp>[-\d]{10} [:\d]{8})',
+    r'(?P<host>\w+)',
+    r'(?P<db>\w+):',
+    r'\[(?P<hash>\w+)\]',
+    r'(?P<url>[^\s]+)  ',
+    r'Exception from line (?P<line>\d+)',
+    r'of (?P<file>[^:]+):',
+    r'(?P<message>.*)'
+))
+
+FATAL_RE = ' '.join((
+    r'\[(?P<timestamp>[^\]]+)\]',
+    r'Fatal error: (?P<message>.*)',
+    r'at (?P<file>\/[\S]+)',
+    r'on line (?P<line>\d+)',
+))
+
+
+try:
+    with io.open('/usr/local/lib/mw-deployment-vars.sh', encoding='utf8') as f:
+        mw_vars = dict(re.findall('(\w+)=(\S+)', f.read()))
+    MW_COMMON_SOURCE = mw_vars['MW_COMMON_SOURCE']
+except (IOError, KeyError):
+    MW_COMMON_SOURCE = '/usr/local/apache/common-local/'
+
+EXCEPTION_TS_FMT = '%Y-%m-%d %H:%M:%S'
+FATAL_TS_FMT = '%d-%b-%Y %H:%M:%S'
+strptime = datetime.datetime.strptime
+
+
+def canonical_path(file):
+    """Strip common path components."""
+    if file.startswith(MW_COMMON_SOURCE):
+        file = file[len(MW_COMMON_SOURCE):]
+    return file
+
+
+def generate_signature(error):
+    hash = hashlib.sha1('%(file)s:%(line)s' % error)
+    return hash.hexdigest()[:10]
+
+
+def parse_frame(frame):
+    """Parse a line representing a frame in the trace."""
+    frame = frame[frame.find(' ') + 1:]
+    try:
+        location, call = frame.split(': ', 1)
+    except:
+        print(frame)
+        raise
+    b = location.find('(')
+    if b != -1:
+        file = location[:b]
+        line = int(location[b + 1:-1])
+        return dict(file=canonical_path(file), line=line, call=call)
+    return dict(file=None, line=None, call=call)
+
+
+def handle_exception(trace, db):
+    """Parse an exception into a map."""
+    try:
+        match = re.match(EXCEPTION_RE, trace.pop(0))
+        exception = match.groupdict()
+        exception['line'] = int(exception['line'])
+        exception['file'] = canonical_path(exception['file'])
+        exception['signature'] = generate_signature(exception)
+        exception['timestamp'] = strptime(exception['timestamp'],
+                                          EXCEPTION_TS_FMT)
+        exception['frames'] = [parse_frame(frame) for frame in trace]
+    except (AttributeError, ValueError):
+        # The regexp fails on database errors, which we don't care about
+        # anyway, since they get logged by the database too.
+        return None
+    else:
+        db['exception'].insert(exception)
+
+
+def handle_fatal(trace, db):
+    """Parse a fatal error log into a map."""
+    try:
+        match = re.match(FATAL_RE, trace.pop(0))
+        fatal = match.groupdict()
+        while trace and trace[0][0] != '#':
+            match = re.match('(\w+): (.*)', trace.pop(0))
+            if match:
+                k, v = match.groups()
+                fatal[k.lower()] = v
+        fatal['frames'] = [parse_frame(frame) for frame in trace[:-1]]
+        fatal['line'] = int(fatal['line'])
+        fatal['file'] = canonical_path(fatal['file'])
+        fatal['signature'] = generate_signature(fatal)
+        fatal['timestamp'] = strptime(fatal['timestamp'], FATAL_TS_FMT)
+    except (AttributeError, ValueError):
+        return None
+    else:
+        db['fatal'].insert(fatal)
+
+
+def iter_errors(callback):
+    """Coroutine. Assembles complete traces and passes them to `callback`."""
+    line = ''
+    # The first trace is likely incomplete, so discard it.
+    while not line.endswith('{main}'):
+        line = (yield)
+    buffer = []
+    while 1:
+        line = (yield)
+        if line.endswith('{main}'):
+            callback(buffer)
+            buffer = []
+        else:
+            buffer.append(line)
+
+
+# Map error types to coroutines.
+processors = {
+    'fatal': iter_errors(handle_fatal),
+    'exception': iter_errors(handle_exception),
+}
+
+
+# Initialize coroutines.
+for processor in processors.values():
+    processor.send(None)
+
+
+client = pymongo.MongoClient(MONGO_URI)
+db = client['mwerrors']
+
+sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+sock.bind(LISTEN_ADDR)
+with io.open(sock.fileno(), mode='rt', encoding='utf8',
+             errors='replace') as f:
+    for line in f:
+        seq_id, type, line = line.split(' ', 2)
+        processors[type].send(line.strip(), db)

-- 
To view, visit https://gerrit.wikimedia.org/r/75560
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic892d3f82487ce6df098f0bd2dd13bc292814fa8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/tools/fluoride
Gerrit-Branch: master
Gerrit-Owner: Ori.livneh <o...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to