[MediaWiki-commits] [Gerrit] eventlogging[master]: Changes UA string to JSON map
Nuria has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/335145 ) Change subject: Changes UA string to JSON map .. Changes UA string to JSON map Uses ua_parser to generate a JSON object with properties obtained from the user agent string. The capsule schema remains unchanged. Custom code parses the WMF app version Bug: T153207 Change-Id: I165214a8b12ff573115381ff1d2d0305e8310e93 --- M eventlogging/parse.py M eventlogging/utils.py M requirements.txt M tests/test_parser.py 4 files changed, 62 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/eventlogging refs/changes/45/335145/1 diff --git a/eventlogging/parse.py b/eventlogging/parse.py index 98bf758..982cfa8 100644 --- a/eventlogging/parse.py +++ b/eventlogging/parse.py @@ -42,6 +42,7 @@ from .compat import json, unquote_plus, uuid5 from .event import Event +from .utils import parse_ua __all__ = ( 'LogParser', 'ncsa_to_unix', @@ -155,6 +156,9 @@ event = {k: f(match.group(k)) for f, k in caster_key_pairs} event.update(event.pop('capsule')) event['uuid'] = capsule_uuid(event) +if ('userAgent' in event) and event['userAgent']: +parsed_ua = parse_ua(event['userAgent']) +event['userAgent'] = parsed_ua return Event(event) def __repr__(self): diff --git a/eventlogging/utils.py b/eventlogging/utils.py index a0cfa62..55ab8e5 100644 --- a/eventlogging/utils.py +++ b/eventlogging/utils.py @@ -12,6 +12,7 @@ import copy import datetime import dateutil.parser +import json import logging import re import os @@ -20,6 +21,7 @@ import threading import traceback import uuid +from ua_parser import user_agent_parser from .compat import ( items, monotonic_clock, urisplit, urlencode, parse_qsl, @@ -291,3 +293,46 @@ # Set module logging level to INFO, DEBUG is too noisy. logging.getLogger("kafka").setLevel(logging.INFO) logging.getLogger("kazoo").setLevel(logging.INFO) + + +def parse_ua(userAgent): +""" +Returns a json string containing the parsed User Agent data +from a request's UA string. Uses the following format: +{ +"device_family":"Other", +"browser_major":"11", +"os_family":"Windows", +"os_major":"-", +"browser_family":"IE", +"os_minor":"-", +"wmf_app_version":"-" +} + +App version in user agents is parsed as follows: +WikipediaApp/5.3.1.1011 (iOS 10.0.2; Phone) +"wmf_app_version":"5.3.1.1011" +WikipediaApp/2.4.160-r-2016-10-14 (Android 4.4.2; Phone) Google Play +"wmf_app_version":"2.4.160-r-2016-10-14" +""" +parsed_ua = user_agent_parser.Parse(userAgent) +formatted_ua = {} +formatted_ua['device_family'] = parsed_ua['device']['family'] +formatted_ua['browser_major'] = parsed_ua['user_agent']['major'] +formatted_ua['os_family'] = parsed_ua['os']['family'] +formatted_ua['os_major'] = parsed_ua['os']['major'] +formatted_ua['browser_family'] = parsed_ua['user_agent']['family'] +formatted_ua['os_minor'] = parsed_ua['os']['minor'] +# default wmf_app_version is '-' +formatted_ua['wmf_app_version'] = '-' +appUA = 'WikipediaApp/' +l = 'WikipediaApp/' + +if appUA in userAgent: +items = userAgent.split() +version = items[0][l:] +formatted_ua['wmf_app_version'] = version + +# escape json so it doesn't cause problems when validating +# to string (per capsule definition) +return json.dumps(formatted_ua) diff --git a/requirements.txt b/requirements.txt index 44a567b..fd3d7b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ statsd>=3.0 tornado>=4.0 sprockets.mixins.statsd>=1.3.1 +ua_parser>=0.7.2 diff --git a/tests/test_parser.py b/tests/test_parser.py index 8d0c117..5d64b71 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -10,6 +10,7 @@ import calendar import datetime +import json import unittest import eventlogging @@ -39,7 +40,16 @@ '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7' 'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa' 'ms.wikimedia.org 132073 2013-01-19T23:16:38 - ' - 'Mozilla/5.0') + 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)' + ' Gecko/20100101 Firefox/10.0') +ua = json.dumps({ +'os_minor': None, +'os_major': None, +'device_family': 'Other', +'os_family': 'Linux', +'browser_major': '10', +'browser_family': 'Firefox' +}) parsed = { 'uuid': '799341a01ba957c79b15dc4d2d950864', 'recvFrom': 'cp3022.esams.wikimedia.org', @@ -49,7 +59,7 @@ 'timestamp': 1358637398, 'schema': 'Generic', 'revision':
[MediaWiki-commits] [Gerrit] eventlogging[master]: Changes UA string to JSON map
Nuria has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/333641 ) Change subject: Changes UA string to JSON map .. Changes UA string to JSON map Uses ua_parser to generate a JSON object with properties obtained from the user agent string. The capsule schema remains unchanged. Bug: T153207 Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb --- M eventlogging/parse.py M eventlogging/utils.py M requirements.txt M tests/test_parser.py 4 files changed, 45 insertions(+), 2 deletions(-) Approvals: jenkins-bot: Verified Nuria: Looks good to me, approved diff --git a/eventlogging/parse.py b/eventlogging/parse.py index 98bf758..982cfa8 100644 --- a/eventlogging/parse.py +++ b/eventlogging/parse.py @@ -42,6 +42,7 @@ from .compat import json, unquote_plus, uuid5 from .event import Event +from .utils import parse_ua __all__ = ( 'LogParser', 'ncsa_to_unix', @@ -155,6 +156,9 @@ event = {k: f(match.group(k)) for f, k in caster_key_pairs} event.update(event.pop('capsule')) event['uuid'] = capsule_uuid(event) +if ('userAgent' in event) and event['userAgent']: +parsed_ua = parse_ua(event['userAgent']) +event['userAgent'] = parsed_ua return Event(event) def __repr__(self): diff --git a/eventlogging/utils.py b/eventlogging/utils.py index a0cfa62..416d5f8 100644 --- a/eventlogging/utils.py +++ b/eventlogging/utils.py @@ -12,6 +12,7 @@ import copy import datetime import dateutil.parser +import json import logging import re import os @@ -20,6 +21,7 @@ import threading import traceback import uuid +from ua_parser import user_agent_parser from .compat import ( items, monotonic_clock, urisplit, urlencode, parse_qsl, @@ -291,3 +293,29 @@ # Set module logging level to INFO, DEBUG is too noisy. logging.getLogger("kafka").setLevel(logging.INFO) logging.getLogger("kazoo").setLevel(logging.INFO) + + +def parse_ua(userAgent): +""" +Returns a json string containing the parsed User Agent data +from a request's UA string. Uses the following format: +{ +"device_family":"Other", +"browser_major":"11", +"os_family":"Windows", +"os_major":"-", +"browser_family":"IE", +"os_minor":"-" +} +""" +parsed_ua = user_agent_parser.Parse(userAgent) +formatted_ua = {} +formatted_ua['device_family'] = parsed_ua['device']['family'] +formatted_ua['browser_major'] = parsed_ua['user_agent']['major'] +formatted_ua['os_family'] = parsed_ua['os']['family'] +formatted_ua['os_major'] = parsed_ua['os']['major'] +formatted_ua['browser_family'] = parsed_ua['user_agent']['family'] +formatted_ua['os_minor'] = parsed_ua['os']['minor'] +# escape json so it doesn't cause problems when validating +# to string (per capsule definition) +return json.dumps(formatted_ua) diff --git a/requirements.txt b/requirements.txt index 44a567b..fd3d7b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ statsd>=3.0 tornado>=4.0 sprockets.mixins.statsd>=1.3.1 +ua_parser>=0.7.2 diff --git a/tests/test_parser.py b/tests/test_parser.py index 8d0c117..5d64b71 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -10,6 +10,7 @@ import calendar import datetime +import json import unittest import eventlogging @@ -39,7 +40,16 @@ '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7' 'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa' 'ms.wikimedia.org 132073 2013-01-19T23:16:38 - ' - 'Mozilla/5.0') + 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)' + ' Gecko/20100101 Firefox/10.0') +ua = json.dumps({ +'os_minor': None, +'os_major': None, +'device_family': 'Other', +'os_family': 'Linux', +'browser_major': '10', +'browser_family': 'Firefox' +}) parsed = { 'uuid': '799341a01ba957c79b15dc4d2d950864', 'recvFrom': 'cp3022.esams.wikimedia.org', @@ -49,7 +59,7 @@ 'timestamp': 1358637398, 'schema': 'Generic', 'revision': 13, -'userAgent': 'Mozilla/5.0', +'userAgent': ua, 'event': { 'articleTitle': 'Héctor Elizondo', 'articleId': 1 -- To view, visit https://gerrit.wikimedia.org/r/333641 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb Gerrit-PatchSet: 11 Gerrit-Project: eventlogging Gerrit-Branch: master Gerrit-Owner: Fdans Gerrit-Reviewer: Fdans Gerrit-Reviewer: Nuria Gerrit-Reviewer: Ottomata Gerrit-Reviewer: jenkins-bot <> __
[MediaWiki-commits] [Gerrit] eventlogging[master]: Changes UA string to JSON map
Fdans has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/333641 ) Change subject: Changes UA string to JSON map .. Changes UA string to JSON map Uses ua_parser to generate a JSON object with properties obtained from the user agent string. The capsule schema remains unchanged. Bug: T153207 Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb --- M eventlogging/parse.py M eventlogging/utils.py M tests/test_parser.py 3 files changed, 31 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/eventlogging refs/changes/41/333641/1 diff --git a/eventlogging/parse.py b/eventlogging/parse.py index 98bf758..8e9b1ac 100644 --- a/eventlogging/parse.py +++ b/eventlogging/parse.py @@ -42,6 +42,7 @@ from .compat import json, unquote_plus, uuid5 from .event import Event +from .utils import parse_ua __all__ = ( 'LogParser', 'ncsa_to_unix', @@ -155,6 +156,7 @@ event = {k: f(match.group(k)) for f, k in caster_key_pairs} event.update(event.pop('capsule')) event['uuid'] = capsule_uuid(event) +event = parse_ua(event) return Event(event) def __repr__(self): diff --git a/eventlogging/utils.py b/eventlogging/utils.py index a0cfa62..ba53503 100644 --- a/eventlogging/utils.py +++ b/eventlogging/utils.py @@ -20,6 +20,7 @@ import threading import traceback import uuid +from ua_parser import user_agent_parser from .compat import ( items, monotonic_clock, urisplit, urlencode, parse_qsl, @@ -291,3 +292,10 @@ # Set module logging level to INFO, DEBUG is too noisy. logging.getLogger("kafka").setLevel(logging.INFO) logging.getLogger("kazoo").setLevel(logging.INFO) + +def parse_ua(event): +if 'userAgent' in event: +parsed_ua = user_agent_parser.Parse(event['userAgent']) +event['userAgent'] = parsed_ua +parsed_ua.pop('string') +return event diff --git a/tests/test_parser.py b/tests/test_parser.py index 8d0c117..d4ac7e3 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -39,7 +39,7 @@ '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7' 'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa' 'ms.wikimedia.org 132073 2013-01-19T23:16:38 - ' - 'Mozilla/5.0') + 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0') parsed = { 'uuid': '799341a01ba957c79b15dc4d2d950864', 'recvFrom': 'cp3022.esams.wikimedia.org', @@ -49,7 +49,26 @@ 'timestamp': 1358637398, 'schema': 'Generic', 'revision': 13, -'userAgent': 'Mozilla/5.0', +'userAgent': { +'device': { +'brand': None, +'model': None, +'family': 'Other' +}, +'os': { +'major': None, +'patch_minor': None, +'minor': None, +'family': 'Linux', +'patch': None +}, +'user_agent': { +'major': '10', +'minor': '0', +'family': 'Firefox', +'patch': None +} +}, 'event': { 'articleTitle': 'Héctor Elizondo', 'articleId': 1 -- To view, visit https://gerrit.wikimedia.org/r/333641 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb Gerrit-PatchSet: 1 Gerrit-Project: eventlogging Gerrit-Branch: master Gerrit-Owner: Fdans ___ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits