Milimetric has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/75866


Change subject: still messing with encoding
......................................................................

still messing with encoding

Change-Id: I4b23508c0b2870dbaf75824668d280c4844aae33
---
M tests/test_controllers/test_cohorts.py
M wikimetrics/controllers/cohorts.py
2 files changed, 7 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics 
refs/changes/66/75866/1

diff --git a/tests/test_controllers/test_cohorts.py 
b/tests/test_controllers/test_cohorts.py
index 4d4d60f..7700f30 100644
--- a/tests/test_controllers/test_cohorts.py
+++ b/tests/test_controllers/test_cohorts.py
@@ -68,6 +68,6 @@
         # 3. nasty trailing unicode space (the reason this file has an 
encoding definition)
         problem_username = ' danĀ '
         
-        parsed_user = parse_username(problem_username)
+        parsed_user = parse_username(problem_username, decode=False)
         valid_user = normalize_user(parsed_user, 'enwiki')
         assert_not_equal(valid_user, None)
diff --git a/wikimetrics/controllers/cohorts.py 
b/wikimetrics/controllers/cohorts.py
index 8b0966e..ba3023d 100644
--- a/wikimetrics/controllers/cohorts.py
+++ b/wikimetrics/controllers/cohorts.py
@@ -283,13 +283,14 @@
                 project = default_project
             
             parsed.append({
+                'raw_username': parse_username(username, decode=False),
                 'username': parse_username(username),
                 'project': project,
             })
     return parsed
 
 
-def parse_username(username):
+def parse_username(username, decode=True):
     """
     parses uncapitalized, whitespace-padded, and weird-charactered mediawiki
     user names into ones that have a chance of being found in the database
@@ -297,11 +298,12 @@
     username = str(username)
     username = username.decode('utf8')
     stripped = username.strip()
+    if not decode:
+        stripped = stripped.encode('utf8')
     # Capitalize the username according to the Mediawiki standard
     # NOTE: unfortunately .title() or .capitalize() don't work
     # because 'miliMetric'.capitalize() == 'Milimetric'
-    capitalized = stripped[0].upper() + stripped[1:]
-    return capitalized.encode('utf8')
+    return stripped[0].upper() + stripped[1:]
 
 
 def normalize_project(project):
@@ -391,7 +393,7 @@
             record['reason_invalid'] = 'invalid project: %s' % 
record['project']
             invalid.append(record)
             continue
-        normalized_user = normalize_user(record['username'], 
normalized_project)
+        normalized_user = normalize_user(record['raw_username'], 
normalized_project)
         # make a link to the potential user page even if user doesn't exist
         # this gives a chance to see any misspelling etc.
         if normalized_user is None:

-- 
To view, visit https://gerrit.wikimedia.org/r/75866
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4b23508c0b2870dbaf75824668d280c4844aae33
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <dandree...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to