Milimetric has submitted this change and it was merged.
Change subject: Improves retrieval of user names on csv report
......................................................................
Improves retrieval of user names on csv report
User Names are retrieved in bulk per project for cvs reports.
Bug: 71255
Change-Id: I4f1b6865e9700e29efc9b0ca4380feb08da3b138
---
M tests/test_api/test_cohort_service.py
M wikimetrics/api/cohorts.py
M wikimetrics/controllers/reports.py
M wikimetrics/models/storage/wikiuser.py
4 files changed, 111 insertions(+), 19 deletions(-)
Approvals:
Milimetric: Looks good to me, approved
diff --git a/tests/test_api/test_cohort_service.py
b/tests/test_api/test_cohort_service.py
index a699706..2ffab3c 100644
--- a/tests/test_api/test_cohort_service.py
+++ b/tests/test_api/test_cohort_service.py
@@ -152,3 +152,36 @@
cohorts = self.cohort_service.get_list(self.session,
self.owner_user_id)
assert_true('dewiki' in [c.default_project for c in cohorts])
+
+ def test_wikiusernames_for_not_existing_cohort(self):
+ """
+ If a cohort does not exist it returns an empty list of user names to
make
+ things easy for the UI
+ """
+ user_names = self.cohort_service.get_wikiusernames_for_cohort('9999',
+
self.session)
+ assert_equals(len(user_names.keys()), 0)
+
+ def test_wikiusernames(self):
+ """
+ Make sure usernames can be retrieved using Wikiuserkey
+ """
+ from wikimetrics.models.storage import (
+ WikiUserKey
+ )
+ # let's get two users and see that names match
+ # this returns a list of WikiUserStore objects
+ users = self.cohort_service.get_wikiusers(self.fixed_cohort,
self.session, 2)
+ user1 = users[0]
+ key1 = WikiUserKey(user1.mediawiki_userid, user1.project,
+ user1.validating_cohort)
+
+ user2 = users[1]
+ key2 = WikiUserKey(user2.mediawiki_userid, user2.project,
+ user2.validating_cohort)
+
+ user_names = self.cohort_service.get_wikiusernames_for_cohort(
+ self.fixed_cohort.id, self.session)
+
+ assert_equals(user_names[key1], user1.mediawiki_username)
+ assert_equals(user_names[key2], user2.mediawiki_username)
diff --git a/wikimetrics/api/cohorts.py b/wikimetrics/api/cohorts.py
index e2522e2..270663f 100644
--- a/wikimetrics/api/cohorts.py
+++ b/wikimetrics/api/cohorts.py
@@ -4,7 +4,7 @@
from wikimetrics.exceptions import Unauthorized, InvalidCohort
from wikimetrics.models import cohort_classes, ValidatedCohort, WikiCohort
from wikimetrics.models.storage import (
- CohortStore, CohortUserStore, UserStore, WikiUserStore,
+ CohortStore, CohortUserStore, UserStore, WikiUserStore, WikiUserKey
)
from wikimetrics.enums import CohortUserRole
@@ -44,6 +44,34 @@
else:
return list(c)
+ def get_wikiusernames_for_cohort(self, cohort_id, session):
+ """
+ Convenience function for the UI to retrieve
+ wikiuser names given a cohort_id
+
+ Parameters:
+ cohort_id
+ session
+ Returns:
+ Dictionary keyed by WikiUserKey or empty dictionary
+ if records not found.
+ """
+ user_names = {}
+
+ try:
+ results = session.query(WikiUserStore.mediawiki_username,
+ WikiUserStore.project,
+ WikiUserStore.mediawiki_userid)\
+ .filter(WikiUserStore.validating_cohort == cohort_id).all()
+
+ except NoResultFound:
+ return user_names
+
+ for r in results:
+ user_names[WikiUserKey(r[2], r[1], cohort_id)] = r[0]
+
+ return user_names
+
# TODO: check ownership of the cohort
def get_users_by_project(self, cohort):
"""
diff --git a/wikimetrics/controllers/reports.py
b/wikimetrics/controllers/reports.py
index 99af275..3ef7c6f 100644
--- a/wikimetrics/controllers/reports.py
+++ b/wikimetrics/controllers/reports.py
@@ -15,7 +15,7 @@
)
from wikimetrics.enums import Aggregation, TimeseriesChoices
from wikimetrics.exceptions import UnauthorizedReportAccessError
-from wikimetrics.api import PublicReportFileManager
+from wikimetrics.api import PublicReportFileManager, CohortService
@app.before_request
@@ -27,6 +27,10 @@
g.file_manager = PublicReportFileManager(
app.logger,
app.absolute_path_to_app_root)
+ if request.path.startswith('/reports/result/'):
+ cohort_service = getattr(g, 'cohort_service', None)
+ if cohort_service is None:
+ g.cohort_service = CohortService()
@app.route('/reports/unset-public/<int:report_id>', methods=['POST'])
@@ -155,10 +159,12 @@
task_result = pj.get_result_safely(result)
p = pj.pretty_parameters()
+ user_names = get_usernames_for_task_result(task_result)
+
if 'Metric_timeseries' in p and p['Metric_timeseries'] !=
TimeseriesChoices.NONE:
- csv_io = get_timeseries_csv(task_result, pj, p)
+ csv_io = get_timeseries_csv(task_result, pj, p, user_names)
else:
- csv_io = get_simple_csv(task_result, pj, p)
+ csv_io = get_simple_csv(task_result, pj, p, user_names)
res = Response(csv_io.getvalue(), mimetype='text/csv')
res.headers['Content-Disposition'] =\
@@ -168,25 +174,41 @@
return json_response(status=celery_task.status)
-def get_user_name(session, wiki_user_key):
+def get_usernames_for_task_result(task_result):
"""
Parameters
- session : a session to the wikimetrics database to search with
- wiki_user_key : an instance of WikiUserKey to search for a WikiUser
with
+ task_result : the result dictionary from Celery
+ Returns
+ user_names : dictionary of user names (keyed by (user_id, project))
+ empty if results are not detailed by user
+
+ TODO: this function should move outside the controller,
+ at the time of writing the function we are
+ consolidating code that wasduplicated
"""
- return session.query(WikiUserStore.mediawiki_username)\
- .filter(WikiUserStore.mediawiki_userid == wiki_user_key.user_id)\
- .filter(WikiUserStore.project == wiki_user_key.user_project)\
- .filter(WikiUserStore.validating_cohort == wiki_user_key.cohort_id)\
- .one()[0]
+ user_names = {}
+ if Aggregation.IND in task_result:
+ session = db.get_session()
+ # cohort should be the same for all users
+ # get cohort from first key
+ cohort_id = None
+
+ for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
+ wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
+ cohort_id = wiki_user_key.cohort_id
+ break
+
+ user_names = g.cohort_service.get_wikiusernames_for_cohort(cohort_id,
session)
+ return user_names
-def get_timeseries_csv(task_result, pj, parameters):
+def get_timeseries_csv(task_result, pj, parameters, user_names):
"""
Parameters
task_result : the result dictionary from Celery
pj : a pointer to the permanent job
parameters : a dictionary of pj.parameters
+ user_names : dictionary of user names (keyed by (user_id, project))
Returns
A StringIO instance representing timeseries CSV
@@ -213,15 +235,15 @@
# collect rows to output in CSV
task_rows = []
- session = db.get_session()
# Individual Results
if Aggregation.IND in task_result:
# fold user_id into dict so we can use DictWriter to escape things
for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
user_id = wiki_user_key.user_id
- user_name = get_user_name(session, wiki_user_key)
project = wiki_user_key.user_project
+ # careful tuple stores user_id like a string
+ user_name = user_names.get(wiki_user_key, '')
for subrow in row.keys():
task_row = row[subrow].copy()
task_row['user_id'] = user_id
@@ -269,12 +291,13 @@
return csv_io
-def get_simple_csv(task_result, pj, parameters):
+def get_simple_csv(task_result, pj, parameters, user_names):
"""
Parameters
task_result : the result dictionary from Celery
pj : a pointer to the permanent job
parameters : a dictionary of pj.parameters
+ user_names : dictionary of user names (keyed by (user_id, project))
Returns
A StringIO instance representing simple CSV
@@ -301,16 +324,16 @@
# collect rows to output in CSV
task_rows = []
-
- session = db.get_session()
# Individual Results
if Aggregation.IND in task_result:
# fold user_id into dict so we can use DictWriter to escape things
for wiki_user_key_str, row in task_result[Aggregation.IND].iteritems():
wiki_user_key = WikiUserKey.fromstr(wiki_user_key_str)
user_id = wiki_user_key.user_id
- user_name = get_user_name(session, wiki_user_key)
project = wiki_user_key.user_project
+
+ # careful tuple stores user_id like a string
+ user_name = user_names.get(wiki_user_key, '')
task_row = row.copy()
task_row['user_id'] = user_id
task_row['user_name'] = user_name
diff --git a/wikimetrics/models/storage/wikiuser.py
b/wikimetrics/models/storage/wikiuser.py
index 0b64e47..027a56f 100644
--- a/wikimetrics/models/storage/wikiuser.py
+++ b/wikimetrics/models/storage/wikiuser.py
@@ -53,3 +53,11 @@
return WikiUserKey.SEPARATOR.join(
(self.user_id, self.user_project, self.cohort_id)
)
+
+ def __hash__(self):
+ # using tuples to hash using internal hashing mechanism as much as
possible
+ return hash(((self.user_id, self.user_project), self.cohort_id))
+
+ def __eq__(self, other):
+ return (other.user_id == self.user_id and other.user_project ==
self.user_project
+ and other.cohort_id == self.cohort_id)
--
To view, visit https://gerrit.wikimedia.org/r/167356
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4f1b6865e9700e29efc9b0ca4380feb08da3b138
Gerrit-PatchSet: 11
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Nuria <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits