Milimetric has uploaded a new change for review. https://gerrit.wikimedia.org/r/86851
Change subject: deduplicating by username and project ...................................................................... deduplicating by username and project Change-Id: Idb827a23bf6669ecda263b35b7b44d28d9d33e2e --- M scripts/test M tests/test_controllers/test_cohorts.py M tests/test_utils/test_one_off_functions.py M wikimetrics/controllers/cohorts.py 4 files changed, 28 insertions(+), 3 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics refs/changes/51/86851/1 diff --git a/scripts/test b/scripts/test index b31359a..83a5573 100755 --- a/scripts/test +++ b/scripts/test @@ -1,4 +1,4 @@ # for example: -# scripts/test "tests/test_controllers/test_cohorts.py:TestCohortsController" +# scripts/test "tests/test_controllers/test_cohorts.py:CohortsControllerTest" # rm .coverage *.db find -name *.pyc | xargs rm ; nosetests --cover-erase $1 diff --git a/tests/test_controllers/test_cohorts.py b/tests/test_controllers/test_cohorts.py index f07fba1..6b7dcd2 100644 --- a/tests/test_controllers/test_cohorts.py +++ b/tests/test_controllers/test_cohorts.py @@ -18,7 +18,7 @@ from wikimetrics.models import Cohort -class TestCohortsController(WebTest): +class CohortsControllerTest(WebTest): def test_index(self): response = self.app.get('/cohorts/', follow_redirects=True) diff --git a/tests/test_utils/test_one_off_functions.py b/tests/test_utils/test_one_off_functions.py index c538983..726e4d7 100644 --- a/tests/test_utils/test_one_off_functions.py +++ b/tests/test_utils/test_one_off_functions.py @@ -4,6 +4,7 @@ from unittest import TestCase from wikimetrics.utils import ( stringify, + deduplicate_by_key, ) from wikimetrics.metrics import NamespaceEdits @@ -31,3 +32,27 @@ result = stringify(normal='hello world') assert_true(result.find('"normal"') >= 0) assert_true(result.find('normal') >= 0) + + def test_deduplicate_by_key(self): + collection_of_dicts = [ + {'index': 'one', 'other': '1'}, + {'index': 'two', 'other': '2'}, + {'index': 'two', 'other': '3'}, + ] + no_duplicates = deduplicate_by_key(collection_of_dicts, lambda r: r['index']) + expected = collection_of_dicts[0:2] + assert_equals(sorted(no_duplicates), expected) + + def test_deduplicate_by_key_tuple(self): + collection_of_dicts = [ + {'index': 'one', 'other': '1'}, + {'index': 'two', 'other': '2'}, + {'index': 'two', 'other': '3'}, + {'index': 'two', 'other': '2'}, + ] + no_duplicates = deduplicate_by_key( + collection_of_dicts, + lambda r: (r['index'], r['other']) + ) + expected = collection_of_dicts[0:3] + assert_equals(sorted(no_duplicates), expected) diff --git a/wikimetrics/controllers/cohorts.py b/wikimetrics/controllers/cohorts.py index ddfd0b5..64ff35e 100644 --- a/wikimetrics/controllers/cohorts.py +++ b/wikimetrics/controllers/cohorts.py @@ -415,5 +415,5 @@ record['user_id'], record['username'] = normalized_user valid.append(record) - valid = deduplicate_by_key(valid, lambda record: record['username']) + valid = deduplicate_by_key(valid, lambda r: (r['username'], r['project'])) return (valid, invalid) -- To view, visit https://gerrit.wikimedia.org/r/86851 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Idb827a23bf6669ecda263b35b7b44d28d9d33e2e Gerrit-PatchSet: 1 Gerrit-Project: analytics/wikimetrics Gerrit-Branch: master Gerrit-Owner: Milimetric <dandree...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits