Milimetric has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/86851


Change subject: deduplicating by username and project
......................................................................

deduplicating by username and project

Change-Id: Idb827a23bf6669ecda263b35b7b44d28d9d33e2e
---
M scripts/test
M tests/test_controllers/test_cohorts.py
M tests/test_utils/test_one_off_functions.py
M wikimetrics/controllers/cohorts.py
4 files changed, 28 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics 
refs/changes/51/86851/1

diff --git a/scripts/test b/scripts/test
index b31359a..83a5573 100755
--- a/scripts/test
+++ b/scripts/test
@@ -1,4 +1,4 @@
 # for example:
-# scripts/test "tests/test_controllers/test_cohorts.py:TestCohortsController"
+# scripts/test "tests/test_controllers/test_cohorts.py:CohortsControllerTest"
 # rm .coverage *.db
 find -name *.pyc | xargs rm ; nosetests --cover-erase $1
diff --git a/tests/test_controllers/test_cohorts.py 
b/tests/test_controllers/test_cohorts.py
index f07fba1..6b7dcd2 100644
--- a/tests/test_controllers/test_cohorts.py
+++ b/tests/test_controllers/test_cohorts.py
@@ -18,7 +18,7 @@
 from wikimetrics.models import Cohort
 
 
-class TestCohortsController(WebTest):
+class CohortsControllerTest(WebTest):
     
     def test_index(self):
         response = self.app.get('/cohorts/', follow_redirects=True)
diff --git a/tests/test_utils/test_one_off_functions.py 
b/tests/test_utils/test_one_off_functions.py
index c538983..726e4d7 100644
--- a/tests/test_utils/test_one_off_functions.py
+++ b/tests/test_utils/test_one_off_functions.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 from wikimetrics.utils import (
     stringify,
+    deduplicate_by_key,
 )
 from wikimetrics.metrics import NamespaceEdits
 
@@ -31,3 +32,27 @@
         result = stringify(normal='hello world')
         assert_true(result.find('"normal"') >= 0)
         assert_true(result.find('normal') >= 0)
+    
+    def test_deduplicate_by_key(self):
+        collection_of_dicts = [
+            {'index': 'one', 'other': '1'},
+            {'index': 'two', 'other': '2'},
+            {'index': 'two', 'other': '3'},
+        ]
+        no_duplicates = deduplicate_by_key(collection_of_dicts, lambda r: 
r['index'])
+        expected = collection_of_dicts[0:2]
+        assert_equals(sorted(no_duplicates), expected)
+    
+    def test_deduplicate_by_key_tuple(self):
+        collection_of_dicts = [
+            {'index': 'one', 'other': '1'},
+            {'index': 'two', 'other': '2'},
+            {'index': 'two', 'other': '3'},
+            {'index': 'two', 'other': '2'},
+        ]
+        no_duplicates = deduplicate_by_key(
+            collection_of_dicts,
+            lambda r: (r['index'], r['other'])
+        )
+        expected = collection_of_dicts[0:3]
+        assert_equals(sorted(no_duplicates), expected)
diff --git a/wikimetrics/controllers/cohorts.py 
b/wikimetrics/controllers/cohorts.py
index ddfd0b5..64ff35e 100644
--- a/wikimetrics/controllers/cohorts.py
+++ b/wikimetrics/controllers/cohorts.py
@@ -415,5 +415,5 @@
         record['user_id'], record['username'] = normalized_user
         valid.append(record)
     
-    valid = deduplicate_by_key(valid, lambda record: record['username'])
+    valid = deduplicate_by_key(valid, lambda r: (r['username'], r['project']))
     return (valid, invalid)

-- 
To view, visit https://gerrit.wikimedia.org/r/86851
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Idb827a23bf6669ecda263b35b7b44d28d9d33e2e
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <dandree...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to