Nuria has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/333641 )

Change subject: Changes UA string to JSON map
......................................................................


Changes UA string to JSON map

Uses ua_parser to generate a JSON object with properties obtained from
the user agent string. The capsule schema remains unchanged.

Bug: T153207
Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb
---
M eventlogging/parse.py
M eventlogging/utils.py
M requirements.txt
M tests/test_parser.py
4 files changed, 45 insertions(+), 2 deletions(-)

Approvals:
  jenkins-bot: Verified
  Nuria: Looks good to me, approved



diff --git a/eventlogging/parse.py b/eventlogging/parse.py
index 98bf758..982cfa8 100644
--- a/eventlogging/parse.py
+++ b/eventlogging/parse.py
@@ -42,6 +42,7 @@
 
 from .compat import json, unquote_plus, uuid5
 from .event import Event
+from .utils import parse_ua
 
 __all__ = (
     'LogParser', 'ncsa_to_unix',
@@ -155,6 +156,9 @@
         event = {k: f(match.group(k)) for f, k in caster_key_pairs}
         event.update(event.pop('capsule'))
         event['uuid'] = capsule_uuid(event)
+        if ('userAgent' in event) and event['userAgent']:
+            parsed_ua = parse_ua(event['userAgent'])
+            event['userAgent'] = parsed_ua
         return Event(event)
 
     def __repr__(self):
diff --git a/eventlogging/utils.py b/eventlogging/utils.py
index a0cfa62..416d5f8 100644
--- a/eventlogging/utils.py
+++ b/eventlogging/utils.py
@@ -12,6 +12,7 @@
 import copy
 import datetime
 import dateutil.parser
+import json
 import logging
 import re
 import os
@@ -20,6 +21,7 @@
 import threading
 import traceback
 import uuid
+from ua_parser import user_agent_parser
 
 from .compat import (
     items, monotonic_clock, urisplit, urlencode, parse_qsl,
@@ -291,3 +293,29 @@
         # Set module logging level to INFO, DEBUG is too noisy.
         logging.getLogger("kafka").setLevel(logging.INFO)
         logging.getLogger("kazoo").setLevel(logging.INFO)
+
+
+def parse_ua(userAgent):
+    """
+    Returns a json string containing the parsed User Agent data
+    from a request's UA string. Uses the following format:
+    {
+        "device_family":"Other",
+        "browser_major":"11",
+        "os_family":"Windows",
+        "os_major":"-",
+        "browser_family":"IE",
+        "os_minor":"-"
+    }
+    """
+    parsed_ua = user_agent_parser.Parse(userAgent)
+    formatted_ua = {}
+    formatted_ua['device_family'] = parsed_ua['device']['family']
+    formatted_ua['browser_major'] = parsed_ua['user_agent']['major']
+    formatted_ua['os_family'] = parsed_ua['os']['family']
+    formatted_ua['os_major'] = parsed_ua['os']['major']
+    formatted_ua['browser_family'] = parsed_ua['user_agent']['family']
+    formatted_ua['os_minor'] = parsed_ua['os']['minor']
+    # escape json so it doesn't cause problems when validating
+    # to string (per capsule definition)
+    return json.dumps(formatted_ua)
diff --git a/requirements.txt b/requirements.txt
index 44a567b..fd3d7b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@
 statsd>=3.0
 tornado>=4.0
 sprockets.mixins.statsd>=1.3.1
+ua_parser>=0.7.2
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 8d0c117..5d64b71 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -10,6 +10,7 @@
 
 import calendar
 import datetime
+import json
 import unittest
 
 import eventlogging
@@ -39,7 +40,16 @@
                '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7'
                'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa'
                'ms.wikimedia.org 132073 2013-01-19T23:16:38 - '
-               'Mozilla/5.0')
+               'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)'
+               ' Gecko/20100101 Firefox/10.0')
+        ua = json.dumps({
+                'os_minor': None,
+                'os_major': None,
+                'device_family': 'Other',
+                'os_family': 'Linux',
+                'browser_major': '10',
+                'browser_family': 'Firefox'
+            })
         parsed = {
             'uuid': '799341a01ba957c79b15dc4d2d950864',
             'recvFrom': 'cp3022.esams.wikimedia.org',
@@ -49,7 +59,7 @@
             'timestamp': 1358637398,
             'schema': 'Generic',
             'revision': 13,
-            'userAgent': 'Mozilla/5.0',
+            'userAgent': ua,
             'event': {
                 'articleTitle': 'Héctor Elizondo',
                 'articleId': 1

-- 
To view, visit https://gerrit.wikimedia.org/r/333641
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb
Gerrit-PatchSet: 11
Gerrit-Project: eventlogging
Gerrit-Branch: master
Gerrit-Owner: Fdans <fd...@wikimedia.org>
Gerrit-Reviewer: Fdans <fd...@wikimedia.org>
Gerrit-Reviewer: Nuria <nu...@wikimedia.org>
Gerrit-Reviewer: Ottomata <ao...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to