Milimetric has submitted this change and it was merged.

Change subject: Improves retrieval of user names on csv report
......................................................................


Improves retrieval of user names on csv report

User Names are retrieved in bulk per project for cvs reports.

Bug: 71255
Change-Id: I4f1b6865e9700e29efc9b0ca4380feb08da3b138
---
M tests/test_api/test_cohort_service.py
M wikimetrics/api/cohorts.py
M wikimetrics/controllers/reports.py
M wikimetrics/models/storage/wikiuser.py
4 files changed, 111 insertions(+), 19 deletions(-)

Approvals:
  Milimetric: Looks good to me, approved



diff --git a/tests/test_api/test_cohort_service.py 
b/tests/test_api/test_cohort_service.py
index a699706..2ffab3c 100644
--- a/tests/test_api/test_cohort_service.py
+++ b/tests/test_api/test_cohort_service.py
@@ -152,3 +152,36 @@
 
         cohorts = self.cohort_service.get_list(self.session, 
self.owner_user_id)
         assert_true('dewiki' in [c.default_project for c in cohorts])
+
+    def test_wikiusernames_for_not_existing_cohort(self):
+        """
+        If a cohort does not exist it returns an empty list of user names to 
make
+        things easy for the UI
+        """
+        user_names = self.cohort_service.get_wikiusernames_for_cohort('9999',
+                                                                      
self.session)
+        assert_equals(len(user_names.keys()), 0)
+
+    def test_wikiusernames(self):
+        """
+        Make sure usernames can be retrieved using Wikiuserkey
+        """
+        from wikimetrics.models.storage import (
+            WikiUserKey
+        )
+        # let's get two users and see that names match
+        # this returns a list of WikiUserStore objects
+        users = self.cohort_service.get_wikiusers(self.fixed_cohort, 
self.session, 2)
+        user1 = users[0]
+        key1 = WikiUserKey(user1.mediawiki_userid, user1.project,
+                           user1.validating_cohort)
+
+        user2 = users[1]
+        key2 = WikiUserKey(user2.mediawiki_userid, user2.project,
+                           user2.validating_cohort)
+
+        user_names = self.cohort_service.get_wikiusernames_for_cohort(
+            self.fixed_cohort.id, self.session)
+
+        assert_equals(user_names[key1], user1.mediawiki_username)
+        assert_equals(user_names[key2], user2.mediawiki_username)
diff --git a/wikimetrics/api/cohorts.py b/wikimetrics/api/cohorts.py
index e2522e2..270663f 100644
--- a/wikimetrics/api/cohorts.py
+++ b/wikimetrics/api/cohorts.py
@@ -4,7 +4,7 @@
 from wikimetrics.exceptions import Unauthorized, InvalidCohort
 from wikimetrics.models import cohort_classes, ValidatedCohort, WikiCohort
 from wikimetrics.models.storage import (
-    CohortStore, CohortUserStore, UserStore, WikiUserStore,
+    CohortStore, CohortUserStore, UserStore, WikiUserStore, WikiUserKey
 )
 from wikimetrics.enums import CohortUserRole
 
@@ -44,6 +44,34 @@
         else:
             return list(c)
 
+    def get_wikiusernames_for_cohort(self, cohort_id, session):
+        """
+        Convenience function for the UI to retrieve
+        wikiuser names given a cohort_id
+
+        Parameters:
+            cohort_id
+            session
+        Returns:
+            Dictionary keyed by WikiUserKey or empty dictionary
+            if records not found.
+        """
+        user_names = {}
+
+        try:
+            results = session.query(WikiUserStore.mediawiki_username,
+                                    WikiUserStore.project,
+                                    WikiUserStore.mediawiki_userid)\
+                .filter(WikiUserStore.validating_cohort == cohort_id).all()
+
+        except NoResultFound:
+            return user_names
+
+        for r in results:
+            user_names[WikiUserKey(r[2], r[1], cohort_id)] = r[0]
+
+        return user_names
+
     # TODO: check ownership of the cohort
     def get_users_by_project(self, cohort):
         """
diff --git a/wikimetrics/controllers/reports.py 
b/wikimetrics/controllers/reports.py
index 99af275..3ef7c6f 100644
--- a/wikimetrics/controllers/reports.py
+++ b/wikimetrics/controllers/reports.py
@@ -15,7 +15,7 @@
 )
 from wikimetrics.enums import Aggregation, TimeseriesChoices
 from wikimetrics.exceptions import UnauthorizedReportAccessError
-from wikimetrics.api import PublicReportFileManager
+from wikimetrics.api import PublicReportFileManager, CohortService
 
 
 @app.before_request
@@ -27,6 +27,10 @@
                 g.file_manager = PublicReportFileManager(
                     app.logger,
                     app.absolute_path_to_app_root)
+        if request.path.startswith('/reports/result/'):
+            cohort_service = getattr(g, 'cohort_service', None)
+            if cohort_service is None:
+                g.cohort_service = CohortService()
 
 
 @app.route('/reports/unset-public/<int:report_id>', methods=['POST'])
@@ -155,10 +159,12 @@
         task_result = pj.get_result_safely(result)
         p = pj.pretty_parameters()
 
+        user_names = get_usernames_for_task_result(task_result)
+
         if 'Metric_timeseries' in p and p['Metric_timeseries'] != 
TimeseriesChoices.NONE:
-            csv_io = get_timeseries_csv(task_result, pj, p)
+            csv_io = get_timeseries_csv(task_result, pj, p, user_names)
         else:
-            csv_io = get_simple_csv(task_result, pj, p)
+            csv_io = get_simple_csv(task_result, pj, p, user_names)
 
         res = Response(csv_io.getvalue(), mimetype='text/csv')
         res.headers['Content-Disposition'] =\
@@ -168,25 +174,41 @@
         return json_response(status=celery_task.status)
 
 
-def get_user_name(session, wiki_user_key):
+def get_usernames_for_task_result(task_result):
     """
     Parameters
-        session         : a session to the wikimetrics database to search with
-        wiki_user_key   : an instance of WikiUserKey to search for a WikiUser 
with
+        task_result : the result dictionary from Celery
+    Returns
+         user_names : dictionary of user names (keyed by (user_id, project))
+                      empty if results are not detailed by user
+
+    TODO: this function should move outside the controller,
+          at the time of writing the function we are
+          consolidating code that wasduplicated
     """
-    return session.query(WikiUserStore.mediawiki_username)\
-        .filter(WikiUserStore.mediawiki_userid == wiki_user_key.user_id)\
-        .filter(WikiUserStore.project == wiki_user_key.user_project)\
-        .filter(WikiUserStore.validating_cohort == wiki_user_key.cohort_id)\
-        .one()[0]
+    user_names = {}
+    if Aggregation.IND in task_result:
+        session = db.get_session()
+        # cohort should be the same for all users
+        # get cohort from first key
+        cohort_id = None
+
+        for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
+            wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
+            cohort_id = wiki_user_key.cohort_id
+            break
+
+        user_names = g.cohort_service.get_wikiusernames_for_cohort(cohort_id, 
session)
+    return user_names
 
 
-def get_timeseries_csv(task_result, pj, parameters):
+def get_timeseries_csv(task_result, pj, parameters, user_names):
     """
     Parameters
         task_result : the result dictionary from Celery
         pj          : a pointer to the permanent job
         parameters  : a dictionary of pj.parameters
+        user_names  : dictionary of user names (keyed by (user_id, project))
 
     Returns
         A StringIO instance representing timeseries CSV
@@ -213,15 +235,15 @@
     # collect rows to output in CSV
     task_rows = []
 
-    session = db.get_session()
     # Individual Results
     if Aggregation.IND in task_result:
         # fold user_id into dict so we can use DictWriter to escape things
         for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
             wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
             user_id = wiki_user_key.user_id
-            user_name = get_user_name(session, wiki_user_key)
             project = wiki_user_key.user_project
+            # careful tuple stores user_id like a string
+            user_name = user_names.get(wiki_user_key, '')
             for subrow in row.keys():
                 task_row = row[subrow].copy()
                 task_row['user_id'] = user_id
@@ -269,12 +291,13 @@
     return csv_io
 
 
-def get_simple_csv(task_result, pj, parameters):
+def get_simple_csv(task_result, pj, parameters, user_names):
     """
     Parameters
         task_result : the result dictionary from Celery
         pj          : a pointer to the permanent job
         parameters  : a dictionary of pj.parameters
+        user_names  : dictionary of user names (keyed by (user_id, project))
 
     Returns
         A StringIO instance representing simple CSV
@@ -301,16 +324,16 @@
 
     # collect rows to output in CSV
     task_rows = []
-
-    session = db.get_session()
     # Individual Results
     if Aggregation.IND in task_result:
         # fold user_id into dict so we can use DictWriter to escape things
         for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
             wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
             user_id = wiki_user_key.user_id
-            user_name = get_user_name(session, wiki_user_key)
             project = wiki_user_key.user_project
+
+            # careful tuple stores user_id like a string
+            user_name = user_names.get(wiki_user_key, '')
             task_row = row.copy()
             task_row['user_id'] = user_id
             task_row['user_name'] = user_name
diff --git a/wikimetrics/models/storage/wikiuser.py 
b/wikimetrics/models/storage/wikiuser.py
index 0b64e47..027a56f 100644
--- a/wikimetrics/models/storage/wikiuser.py
+++ b/wikimetrics/models/storage/wikiuser.py
@@ -53,3 +53,11 @@
         return WikiUserKey.SEPARATOR.join(
             (self.user_id, self.user_project, self.cohort_id)
         )
+        
+    def __hash__(self):
+        # using tuples to hash using internal hashing mechanism as much as 
possible
+        return hash(((self.user_id, self.user_project), self.cohort_id))
+        
+    def __eq__(self, other):
+        return (other.user_id == self.user_id and other.user_project == 
self.user_project
+                and other.cohort_id == self.cohort_id)

-- 
To view, visit https://gerrit.wikimedia.org/r/167356
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I4f1b6865e9700e29efc9b0ca4380feb08da3b138
Gerrit-PatchSet: 11
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Nuria <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to