Nuria has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/333641 )
Change subject: Changes UA string to JSON map ...................................................................... Changes UA string to JSON map Uses ua_parser to generate a JSON object with properties obtained from the user agent string. The capsule schema remains unchanged. Bug: T153207 Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb --- M eventlogging/parse.py M eventlogging/utils.py M requirements.txt M tests/test_parser.py 4 files changed, 45 insertions(+), 2 deletions(-) Approvals: jenkins-bot: Verified Nuria: Looks good to me, approved diff --git a/eventlogging/parse.py b/eventlogging/parse.py index 98bf758..982cfa8 100644 --- a/eventlogging/parse.py +++ b/eventlogging/parse.py @@ -42,6 +42,7 @@ from .compat import json, unquote_plus, uuid5 from .event import Event +from .utils import parse_ua __all__ = ( 'LogParser', 'ncsa_to_unix', @@ -155,6 +156,9 @@ event = {k: f(match.group(k)) for f, k in caster_key_pairs} event.update(event.pop('capsule')) event['uuid'] = capsule_uuid(event) + if ('userAgent' in event) and event['userAgent']: + parsed_ua = parse_ua(event['userAgent']) + event['userAgent'] = parsed_ua return Event(event) def __repr__(self): diff --git a/eventlogging/utils.py b/eventlogging/utils.py index a0cfa62..416d5f8 100644 --- a/eventlogging/utils.py +++ b/eventlogging/utils.py @@ -12,6 +12,7 @@ import copy import datetime import dateutil.parser +import json import logging import re import os @@ -20,6 +21,7 @@ import threading import traceback import uuid +from ua_parser import user_agent_parser from .compat import ( items, monotonic_clock, urisplit, urlencode, parse_qsl, @@ -291,3 +293,29 @@ # Set module logging level to INFO, DEBUG is too noisy. logging.getLogger("kafka").setLevel(logging.INFO) logging.getLogger("kazoo").setLevel(logging.INFO) + + +def parse_ua(userAgent): + """ + Returns a json string containing the parsed User Agent data + from a request's UA string. Uses the following format: + { + "device_family":"Other", + "browser_major":"11", + "os_family":"Windows", + "os_major":"-", + "browser_family":"IE", + "os_minor":"-" + } + """ + parsed_ua = user_agent_parser.Parse(userAgent) + formatted_ua = {} + formatted_ua['device_family'] = parsed_ua['device']['family'] + formatted_ua['browser_major'] = parsed_ua['user_agent']['major'] + formatted_ua['os_family'] = parsed_ua['os']['family'] + formatted_ua['os_major'] = parsed_ua['os']['major'] + formatted_ua['browser_family'] = parsed_ua['user_agent']['family'] + formatted_ua['os_minor'] = parsed_ua['os']['minor'] + # escape json so it doesn't cause problems when validating + # to string (per capsule definition) + return json.dumps(formatted_ua) diff --git a/requirements.txt b/requirements.txt index 44a567b..fd3d7b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ statsd>=3.0 tornado>=4.0 sprockets.mixins.statsd>=1.3.1 +ua_parser>=0.7.2 diff --git a/tests/test_parser.py b/tests/test_parser.py index 8d0c117..5d64b71 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -10,6 +10,7 @@ import calendar import datetime +import json import unittest import eventlogging @@ -39,7 +40,16 @@ '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7' 'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa' 'ms.wikimedia.org 132073 2013-01-19T23:16:38 - ' - 'Mozilla/5.0') + 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)' + ' Gecko/20100101 Firefox/10.0') + ua = json.dumps({ + 'os_minor': None, + 'os_major': None, + 'device_family': 'Other', + 'os_family': 'Linux', + 'browser_major': '10', + 'browser_family': 'Firefox' + }) parsed = { 'uuid': '799341a01ba957c79b15dc4d2d950864', 'recvFrom': 'cp3022.esams.wikimedia.org', @@ -49,7 +59,7 @@ 'timestamp': 1358637398, 'schema': 'Generic', 'revision': 13, - 'userAgent': 'Mozilla/5.0', + 'userAgent': ua, 'event': { 'articleTitle': 'Héctor Elizondo', 'articleId': 1 -- To view, visit https://gerrit.wikimedia.org/r/333641 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb Gerrit-PatchSet: 11 Gerrit-Project: eventlogging Gerrit-Branch: master Gerrit-Owner: Fdans <fd...@wikimedia.org> Gerrit-Reviewer: Fdans <fd...@wikimedia.org> Gerrit-Reviewer: Nuria <nu...@wikimedia.org> Gerrit-Reviewer: Ottomata <ao...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits