[MediaWiki-commits] [Gerrit] eventlogging[master]: Changes UA string to JSON map

2017-01-30 Thread Nuria (Code Review)
Nuria has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/335145 )

Change subject: Changes UA string to JSON map
..

Changes UA string to JSON map

Uses ua_parser to generate a JSON object with properties obtained from
the user agent string. The capsule schema remains unchanged.

Custom code parses the WMF app version

Bug: T153207
Change-Id: I165214a8b12ff573115381ff1d2d0305e8310e93
---
M eventlogging/parse.py
M eventlogging/utils.py
M requirements.txt
M tests/test_parser.py
4 files changed, 62 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/eventlogging 
refs/changes/45/335145/1

diff --git a/eventlogging/parse.py b/eventlogging/parse.py
index 98bf758..982cfa8 100644
--- a/eventlogging/parse.py
+++ b/eventlogging/parse.py
@@ -42,6 +42,7 @@
 
 from .compat import json, unquote_plus, uuid5
 from .event import Event
+from .utils import parse_ua
 
 __all__ = (
 'LogParser', 'ncsa_to_unix',
@@ -155,6 +156,9 @@
 event = {k: f(match.group(k)) for f, k in caster_key_pairs}
 event.update(event.pop('capsule'))
 event['uuid'] = capsule_uuid(event)
+if ('userAgent' in event) and event['userAgent']:
+parsed_ua = parse_ua(event['userAgent'])
+event['userAgent'] = parsed_ua
 return Event(event)
 
 def __repr__(self):
diff --git a/eventlogging/utils.py b/eventlogging/utils.py
index a0cfa62..55ab8e5 100644
--- a/eventlogging/utils.py
+++ b/eventlogging/utils.py
@@ -12,6 +12,7 @@
 import copy
 import datetime
 import dateutil.parser
+import json
 import logging
 import re
 import os
@@ -20,6 +21,7 @@
 import threading
 import traceback
 import uuid
+from ua_parser import user_agent_parser
 
 from .compat import (
 items, monotonic_clock, urisplit, urlencode, parse_qsl,
@@ -291,3 +293,46 @@
 # Set module logging level to INFO, DEBUG is too noisy.
 logging.getLogger("kafka").setLevel(logging.INFO)
 logging.getLogger("kazoo").setLevel(logging.INFO)
+
+
+def parse_ua(userAgent):
+"""
+Returns a json string containing the parsed User Agent data
+from a request's UA string. Uses the following format:
+{
+"device_family":"Other",
+"browser_major":"11",
+"os_family":"Windows",
+"os_major":"-",
+"browser_family":"IE",
+"os_minor":"-",
+"wmf_app_version":"-"
+}
+
+App version in user agents is parsed as follows:
+WikipediaApp/5.3.1.1011 (iOS 10.0.2; Phone)
+"wmf_app_version":"5.3.1.1011"
+WikipediaApp/2.4.160-r-2016-10-14 (Android 4.4.2; Phone) Google Play
+"wmf_app_version":"2.4.160-r-2016-10-14"
+"""
+parsed_ua = user_agent_parser.Parse(userAgent)
+formatted_ua = {}
+formatted_ua['device_family'] = parsed_ua['device']['family']
+formatted_ua['browser_major'] = parsed_ua['user_agent']['major']
+formatted_ua['os_family'] = parsed_ua['os']['family']
+formatted_ua['os_major'] = parsed_ua['os']['major']
+formatted_ua['browser_family'] = parsed_ua['user_agent']['family']
+formatted_ua['os_minor'] = parsed_ua['os']['minor']
+# default wmf_app_version is '-'
+formatted_ua['wmf_app_version'] = '-'
+appUA = 'WikipediaApp/'
+l = 'WikipediaApp/'
+
+if appUA in userAgent:
+items = userAgent.split()
+version = items[0][l:]
+formatted_ua['wmf_app_version'] = version
+
+# escape json so it doesn't cause problems when validating
+# to string (per capsule definition)
+return json.dumps(formatted_ua)
diff --git a/requirements.txt b/requirements.txt
index 44a567b..fd3d7b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@
 statsd>=3.0
 tornado>=4.0
 sprockets.mixins.statsd>=1.3.1
+ua_parser>=0.7.2
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 8d0c117..5d64b71 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -10,6 +10,7 @@
 
 import calendar
 import datetime
+import json
 import unittest
 
 import eventlogging
@@ -39,7 +40,16 @@
'2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7'
'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa'
'ms.wikimedia.org 132073 2013-01-19T23:16:38 - '
-   'Mozilla/5.0')
+   'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)'
+   ' Gecko/20100101 Firefox/10.0')
+ua = json.dumps({
+'os_minor': None,
+'os_major': None,
+'device_family': 'Other',
+'os_family': 'Linux',
+'browser_major': '10',
+'browser_family': 'Firefox'
+})
 parsed = {
 'uuid': '799341a01ba957c79b15dc4d2d950864',
 'recvFrom': 'cp3022.esams.wikimedia.org',
@@ -49,7 +59,7 @@
 'timestamp': 1358637398,
 'schema': 'Generic',
 'revision': 

[MediaWiki-commits] [Gerrit] eventlogging[master]: Changes UA string to JSON map

2017-01-30 Thread Nuria (Code Review)
Nuria has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/333641 )

Change subject: Changes UA string to JSON map
..


Changes UA string to JSON map

Uses ua_parser to generate a JSON object with properties obtained from
the user agent string. The capsule schema remains unchanged.

Bug: T153207
Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb
---
M eventlogging/parse.py
M eventlogging/utils.py
M requirements.txt
M tests/test_parser.py
4 files changed, 45 insertions(+), 2 deletions(-)

Approvals:
  jenkins-bot: Verified
  Nuria: Looks good to me, approved



diff --git a/eventlogging/parse.py b/eventlogging/parse.py
index 98bf758..982cfa8 100644
--- a/eventlogging/parse.py
+++ b/eventlogging/parse.py
@@ -42,6 +42,7 @@
 
 from .compat import json, unquote_plus, uuid5
 from .event import Event
+from .utils import parse_ua
 
 __all__ = (
 'LogParser', 'ncsa_to_unix',
@@ -155,6 +156,9 @@
 event = {k: f(match.group(k)) for f, k in caster_key_pairs}
 event.update(event.pop('capsule'))
 event['uuid'] = capsule_uuid(event)
+if ('userAgent' in event) and event['userAgent']:
+parsed_ua = parse_ua(event['userAgent'])
+event['userAgent'] = parsed_ua
 return Event(event)
 
 def __repr__(self):
diff --git a/eventlogging/utils.py b/eventlogging/utils.py
index a0cfa62..416d5f8 100644
--- a/eventlogging/utils.py
+++ b/eventlogging/utils.py
@@ -12,6 +12,7 @@
 import copy
 import datetime
 import dateutil.parser
+import json
 import logging
 import re
 import os
@@ -20,6 +21,7 @@
 import threading
 import traceback
 import uuid
+from ua_parser import user_agent_parser
 
 from .compat import (
 items, monotonic_clock, urisplit, urlencode, parse_qsl,
@@ -291,3 +293,29 @@
 # Set module logging level to INFO, DEBUG is too noisy.
 logging.getLogger("kafka").setLevel(logging.INFO)
 logging.getLogger("kazoo").setLevel(logging.INFO)
+
+
+def parse_ua(userAgent):
+"""
+Returns a json string containing the parsed User Agent data
+from a request's UA string. Uses the following format:
+{
+"device_family":"Other",
+"browser_major":"11",
+"os_family":"Windows",
+"os_major":"-",
+"browser_family":"IE",
+"os_minor":"-"
+}
+"""
+parsed_ua = user_agent_parser.Parse(userAgent)
+formatted_ua = {}
+formatted_ua['device_family'] = parsed_ua['device']['family']
+formatted_ua['browser_major'] = parsed_ua['user_agent']['major']
+formatted_ua['os_family'] = parsed_ua['os']['family']
+formatted_ua['os_major'] = parsed_ua['os']['major']
+formatted_ua['browser_family'] = parsed_ua['user_agent']['family']
+formatted_ua['os_minor'] = parsed_ua['os']['minor']
+# escape json so it doesn't cause problems when validating
+# to string (per capsule definition)
+return json.dumps(formatted_ua)
diff --git a/requirements.txt b/requirements.txt
index 44a567b..fd3d7b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@
 statsd>=3.0
 tornado>=4.0
 sprockets.mixins.statsd>=1.3.1
+ua_parser>=0.7.2
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 8d0c117..5d64b71 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -10,6 +10,7 @@
 
 import calendar
 import datetime
+import json
 import unittest
 
 import eventlogging
@@ -39,7 +40,16 @@
'2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7'
'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa'
'ms.wikimedia.org 132073 2013-01-19T23:16:38 - '
-   'Mozilla/5.0')
+   'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)'
+   ' Gecko/20100101 Firefox/10.0')
+ua = json.dumps({
+'os_minor': None,
+'os_major': None,
+'device_family': 'Other',
+'os_family': 'Linux',
+'browser_major': '10',
+'browser_family': 'Firefox'
+})
 parsed = {
 'uuid': '799341a01ba957c79b15dc4d2d950864',
 'recvFrom': 'cp3022.esams.wikimedia.org',
@@ -49,7 +59,7 @@
 'timestamp': 1358637398,
 'schema': 'Generic',
 'revision': 13,
-'userAgent': 'Mozilla/5.0',
+'userAgent': ua,
 'event': {
 'articleTitle': 'Héctor Elizondo',
 'articleId': 1

-- 
To view, visit https://gerrit.wikimedia.org/r/333641
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb
Gerrit-PatchSet: 11
Gerrit-Project: eventlogging
Gerrit-Branch: master
Gerrit-Owner: Fdans 
Gerrit-Reviewer: Fdans 
Gerrit-Reviewer: Nuria 
Gerrit-Reviewer: Ottomata 
Gerrit-Reviewer: jenkins-bot <>

__

[MediaWiki-commits] [Gerrit] eventlogging[master]: Changes UA string to JSON map

2017-01-23 Thread Fdans (Code Review)
Fdans has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/333641 )

Change subject: Changes UA string to JSON map
..

Changes UA string to JSON map

Uses ua_parser to generate a JSON object with properties obtained from
the user agent string. The capsule schema remains unchanged.

Bug: T153207
Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb
---
M eventlogging/parse.py
M eventlogging/utils.py
M tests/test_parser.py
3 files changed, 31 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/eventlogging 
refs/changes/41/333641/1

diff --git a/eventlogging/parse.py b/eventlogging/parse.py
index 98bf758..8e9b1ac 100644
--- a/eventlogging/parse.py
+++ b/eventlogging/parse.py
@@ -42,6 +42,7 @@
 
 from .compat import json, unquote_plus, uuid5
 from .event import Event
+from .utils import parse_ua
 
 __all__ = (
 'LogParser', 'ncsa_to_unix',
@@ -155,6 +156,7 @@
 event = {k: f(match.group(k)) for f, k in caster_key_pairs}
 event.update(event.pop('capsule'))
 event['uuid'] = capsule_uuid(event)
+event = parse_ua(event)
 return Event(event)
 
 def __repr__(self):
diff --git a/eventlogging/utils.py b/eventlogging/utils.py
index a0cfa62..ba53503 100644
--- a/eventlogging/utils.py
+++ b/eventlogging/utils.py
@@ -20,6 +20,7 @@
 import threading
 import traceback
 import uuid
+from ua_parser import user_agent_parser
 
 from .compat import (
 items, monotonic_clock, urisplit, urlencode, parse_qsl,
@@ -291,3 +292,10 @@
 # Set module logging level to INFO, DEBUG is too noisy.
 logging.getLogger("kafka").setLevel(logging.INFO)
 logging.getLogger("kazoo").setLevel(logging.INFO)
+
+def parse_ua(event):
+if 'userAgent' in event:
+parsed_ua = user_agent_parser.Parse(event['userAgent'])
+event['userAgent'] = parsed_ua
+parsed_ua.pop('string')
+return event
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 8d0c117..d4ac7e3 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -39,7 +39,7 @@
'2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7'
'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa'
'ms.wikimedia.org 132073 2013-01-19T23:16:38 - '
-   'Mozilla/5.0')
+   'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 
Firefox/10.0')
 parsed = {
 'uuid': '799341a01ba957c79b15dc4d2d950864',
 'recvFrom': 'cp3022.esams.wikimedia.org',
@@ -49,7 +49,26 @@
 'timestamp': 1358637398,
 'schema': 'Generic',
 'revision': 13,
-'userAgent': 'Mozilla/5.0',
+'userAgent': {
+'device': {
+'brand': None,
+'model': None,
+'family': 'Other'
+},
+'os': {
+'major': None,
+'patch_minor': None,
+'minor': None,
+'family': 'Linux',
+'patch': None
+},
+'user_agent': {
+'major': '10',
+'minor': '0',
+'family': 'Firefox',
+'patch': None
+}
+},
 'event': {
 'articleTitle': 'Héctor Elizondo',
 'articleId': 1

-- 
To view, visit https://gerrit.wikimedia.org/r/333641
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iafaeb82e52715d853cf3e68a69969c0bf1e209eb
Gerrit-PatchSet: 1
Gerrit-Project: eventlogging
Gerrit-Branch: master
Gerrit-Owner: Fdans 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits