Hello Krinkle, Ottomata, jenkins-bot, Fdans, I'd like you to do a code review. Please visit
https://gerrit.wikimedia.org/r/342663 to review the following change. Change subject: Revert "Change UA string to JSON map" ...................................................................... Revert "Change UA string to JSON map" This reverts commit c3ccb4ab647c95145e98b3c9e2a1834ef1fc51fd due to issues with database length of User Agent column, change works well otherwise Change-Id: I9e8505d65247705cd548944526c89e6894b464dd --- M eventlogging/parse.py M eventlogging/utils.py M requirements.txt M tests/test_parser.py M tests/test_utils.py 5 files changed, 3 insertions(+), 121 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/eventlogging refs/changes/63/342663/1 diff --git a/eventlogging/parse.py b/eventlogging/parse.py index 982cfa8..98bf758 100644 --- a/eventlogging/parse.py +++ b/eventlogging/parse.py @@ -42,7 +42,6 @@ from .compat import json, unquote_plus, uuid5 from .event import Event -from .utils import parse_ua __all__ = ( 'LogParser', 'ncsa_to_unix', @@ -156,9 +155,6 @@ event = {k: f(match.group(k)) for f, k in caster_key_pairs} event.update(event.pop('capsule')) event['uuid'] = capsule_uuid(event) - if ('userAgent' in event) and event['userAgent']: - parsed_ua = parse_ua(event['userAgent']) - event['userAgent'] = parsed_ua return Event(event) def __repr__(self): diff --git a/eventlogging/utils.py b/eventlogging/utils.py index 482c03a..a0cfa62 100644 --- a/eventlogging/utils.py +++ b/eventlogging/utils.py @@ -12,7 +12,6 @@ import copy import datetime import dateutil.parser -import json import logging import re import os @@ -21,7 +20,6 @@ import threading import traceback import uuid -from ua_parser import user_agent_parser from .compat import ( items, monotonic_clock, urisplit, urlencode, parse_qsl, @@ -293,47 +291,3 @@ # Set module logging level to INFO, DEBUG is too noisy. logging.getLogger("kafka").setLevel(logging.INFO) logging.getLogger("kazoo").setLevel(logging.INFO) - - -def parse_ua(user_agent): - """ - Returns a json string containing the parsed User Agent data - from a request's UA string. Uses the following format: - { - "device_family": "Other", - "browser_family": "IE", - "browser_major": "11", - "browser_major": "0", - "os_family": "Windows Vista", - "os_major": null, - "os_minor": null, - "wmf_app_version": "-" - } - - App version in user agents is parsed as follows: - WikipediaApp/5.3.1.1011 (iOS 10.0.2; Phone) - "wmf_app_version":"5.3.1.1011" - WikipediaApp/2.4.160-r-2016-10-14 (Android 4.4.2; Phone) Google Play - "wmf_app_version":"2.4.160-r-2016-10-14" - """ - parsed_ua = user_agent_parser.Parse(user_agent) - formatted_ua = {} - formatted_ua['device_family'] = parsed_ua['device']['family'] - formatted_ua['browser_family'] = parsed_ua['user_agent']['family'] - formatted_ua['browser_major'] = parsed_ua['user_agent']['major'] - formatted_ua['browser_minor'] = parsed_ua['user_agent']['minor'] - formatted_ua['os_family'] = parsed_ua['os']['family'] - formatted_ua['os_major'] = parsed_ua['os']['major'] - formatted_ua['os_minor'] = parsed_ua['os']['minor'] - # default wmf_app_version is '-' - formatted_ua['wmf_app_version'] = '-' - app_ua = 'WikipediaApp/' - - if app_ua in user_agent: - items = user_agent.split() - version = items[0].split("/")[1] - formatted_ua['wmf_app_version'] = version - - # escape json so it doesn't cause problems when validating - # to string (per capsule definition) - return json.dumps(formatted_ua) diff --git a/requirements.txt b/requirements.txt index fd3d7b3..44a567b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,3 @@ statsd>=3.0 tornado>=4.0 sprockets.mixins.statsd>=1.3.1 -ua_parser>=0.7.2 diff --git a/tests/test_parser.py b/tests/test_parser.py index 67128d5..8d0c117 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -10,7 +10,6 @@ import calendar import datetime -import json import unittest import eventlogging @@ -40,18 +39,7 @@ '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7' 'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa' 'ms.wikimedia.org 132073 2013-01-19T23:16:38 - ' - 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)' - ' Gecko/20100101 Firefox/10.0') - ua = json.dumps({ - 'os_minor': None, - 'os_major': None, - 'device_family': 'Other', - 'os_family': 'Linux', - 'browser_major': '10', - 'browser_minor': '0', - 'browser_family': 'Firefox', - 'wmf_app_version': '-' - }) + 'Mozilla/5.0') parsed = { 'uuid': '799341a01ba957c79b15dc4d2d950864', 'recvFrom': 'cp3022.esams.wikimedia.org', @@ -61,22 +49,13 @@ 'timestamp': 1358637398, 'schema': 'Generic', 'revision': 13, - 'userAgent': ua, + 'userAgent': 'Mozilla/5.0', 'event': { 'articleTitle': 'Héctor Elizondo', 'articleId': 1 } } - fromParser = parser.parse(raw) - for key in parsed: - if key == 'userAgent': - # Python changes the order of keys when dumping objects into - # a string, so we need to compare the ua separately parsing - # it into an object. - self.assertEqual(json.loads(parsed[key]), - json.loads(fromParser[key])) - else: - self.assertEqual(fromParser[key], parsed[key]) + self.assertEqual(parser.parse(raw), parsed) def test_parser_server_side_events(self): """Parser test: server-side events.""" diff --git a/tests/test_utils.py b/tests/test_utils.py index 131d23e..0fe5e67 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -11,7 +11,6 @@ import datetime import unittest import uuid -import json import eventlogging from eventlogging.compat import long @@ -143,48 +142,3 @@ 'test', 'group_id should equal test' ) - - def test_ua_parse_ios(self): - ios_ua = 'WikipediaApp/5.3.3.1038 (iOS 10.2; Phone)' - parsed = json.dumps({ - 'os_minor': '2', - 'os_major': '10', - 'device_family': 'Other', - 'os_family': 'iOS', - 'browser_major': None, - 'browser_minor': None, - 'browser_family': 'Other', - 'wmf_app_version': '5.3.3.1038' - }) - self.assertEqual(json.loads(parsed), - json.loads(eventlogging.utils.parse_ua(ios_ua))) - - def test_ua_parse_android(self): - android_ua = 'WikipediaApp/2.4.160-r-2016-10-14 (Android 4.4.2; Phone)' - parsed = json.dumps({ - 'os_major': '4', - 'wmf_app_version': '2.4.160-r-2016-10-14', - 'os_family': 'Android', - 'device_family': 'Generic Smartphone', - 'browser_family': 'Android', - 'browser_minor': '4', - 'browser_major': '4', - 'os_minor': '4' - }) - self.assertEqual(json.loads(parsed), - json.loads(eventlogging.utils.parse_ua(android_ua))) - - def test_ua_parse_empty(self): - ua = "" - parsed = json.dumps({ - 'os_minor': None, - 'os_major': None, - 'device_family': 'Other', - 'os_family': 'Other', - 'browser_major': None, - 'browser_minor': None, - 'browser_family': 'Other', - 'wmf_app_version': '-' - }) - self.assertEqual(json.loads(parsed), - json.loads(eventlogging.utils.parse_ua(ua))) -- To view, visit https://gerrit.wikimedia.org/r/342663 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I9e8505d65247705cd548944526c89e6894b464dd Gerrit-PatchSet: 1 Gerrit-Project: eventlogging Gerrit-Branch: master Gerrit-Owner: Nuria <nu...@wikimedia.org> Gerrit-Reviewer: Fdans <fd...@wikimedia.org> Gerrit-Reviewer: Krinkle <krinklem...@gmail.com> Gerrit-Reviewer: Ottomata <ao...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits