jenkins-bot has submitted this change and it was merged.
Change subject: Improve performance by excluding 0 results
......................................................................
Improve performance by excluding 0 results
When running wiki cohorts, these three metrics were returning
editor_id: 0 for editors that passed early filters but did not pass the
ultimate metric test. This is inefficient for a few reasons, so I
cleaned up the tests that expected this behavior and fixed the code.
Change-Id: I280028ee85b8e97da8ea09b34012654c49c12c9f
---
M tests/test_metrics/test_rolling_active_editor.py
M tests/test_metrics/test_rolling_new_active_editor.py
M tests/test_metrics/test_rolling_surviving_new_active_editor.py
M wikimetrics/metrics/rolling_active_editor.py
M wikimetrics/metrics/rolling_new_active_editor.py
M wikimetrics/metrics/rolling_surviving_new_active_editor.py
6 files changed, 30 insertions(+), 63 deletions(-)
Approvals:
Mforns: Checked; Looks good to me, but someone else must approve
Nuria: Verified; Looks good to me, approved
jenkins-bot: Verified
diff --git a/tests/test_metrics/test_rolling_active_editor.py
b/tests/test_metrics/test_rolling_active_editor.py
index 2312b4b..f150c12 100644
--- a/tests/test_metrics/test_rolling_active_editor.py
+++ b/tests/test_metrics/test_rolling_active_editor.py
@@ -96,26 +96,18 @@
expected = set(self.editor_ids + [make_active.user_id])
# editors with no edits at all won't be picked up by the query
expected.remove(self.editor_ids[3])
+ # editors that haven't made enough queries won't pass the test
+ expected.remove(self.editor_ids[1])
metric = RollingActiveEditor(
end_date=self.r_plus_30,
)
results = metric(None, self.mwSession)
+ # all wiki cohort results will be editors that pass the test
assert_equal(set(results.keys()), expected)
- expected_results = {
- # all actives show, whether in a cohort or not
- self.editor_ids[0] : 1,
- self.editor_ids[2] : 1,
- self.editor_ids[4] : 1,
- make_active.user_id : 1,
- # users with not enough edits will show up with 0 as the result
- self.editor_ids[1] : 0,
- }
- for user_id, result in expected_results.items():
- assert_equal(results[user_id][metric.id], result)
- # users with no edits at all just won't show up
- assert_equal(results.get(self.editor_ids[3], -1), -1)
+ for user_id in expected:
+ assert_equal(results[user_id][metric.id], 1)
def test_wiki_cohort_all_bots(self):
# make everyone a bot and make sure they're excluded
diff --git a/tests/test_metrics/test_rolling_new_active_editor.py
b/tests/test_metrics/test_rolling_new_active_editor.py
index b057819..d80ff1b 100644
--- a/tests/test_metrics/test_rolling_new_active_editor.py
+++ b/tests/test_metrics/test_rolling_new_active_editor.py
@@ -118,24 +118,18 @@
expected.remove(self.editor_ids[2])
expected.remove(self.editor_ids[3])
expected.remove(self.editor_ids[4])
+ # editors that haven't made enough queries won't pass the test
+ expected.remove(self.editor_ids[6])
metric = RollingNewActiveEditor(
end_date=self.r_plus_30,
)
results = metric(None, self.mwSession)
+ # all wiki cohort results will be editors that pass the test
assert_equal(set(results.keys()), expected)
- expected_results = {
- # all actives show, whether in a cohort or not
- self.editor_ids[5] : 1,
- self.editor_ids[7] : 1,
- self.editor_ids[9] : 1,
- make_active.user_id : 1,
- # users with not enough edits will show up with 0 as the result
- self.editor_ids[6] : 0,
- }
- for user_id, result in expected_results.items():
- assert_equal(results[user_id][metric.id], result)
+ for user_id in expected:
+ assert_equal(results[user_id][metric.id], 1)
def test_wiki_cohort_nobody_qualifying(self):
# make everyone fail the registration criteria and make sure they're
excluded
diff --git a/tests/test_metrics/test_rolling_surviving_new_active_editor.py
b/tests/test_metrics/test_rolling_surviving_new_active_editor.py
index d60f53a..ac7b424 100644
--- a/tests/test_metrics/test_rolling_surviving_new_active_editor.py
+++ b/tests/test_metrics/test_rolling_surviving_new_active_editor.py
@@ -125,7 +125,8 @@
# editors with no edits at all won't be picked up by the query
expected.remove(self.editor_ids[13])
# editors that haven't registered in time shouldn't be picked up at all
- for reg_before in range(7):
+ # editors that haven't made enough queries won't pass the test
+ for reg_before in range(11):
expected.remove(self.editor_ids[reg_before])
metric = RollingSurvivingNewActiveEditor(
@@ -133,22 +134,10 @@
)
results = metric(None, self.mwSession)
+ # all wiki cohort results will be editors that pass the test
assert_equal(set(results.keys()), expected)
- expected_results = {
- # all actives show, whether in a cohort or not
- self.editor_ids[11] : 1,
- self.editor_ids[12] : 1,
- make_active.user_id : 1,
- # users with not enough edits will show up with 0 as the result
- self.editor_ids[7] : 0,
- self.editor_ids[8] : 0,
- self.editor_ids[9] : 0,
- self.editor_ids[10] : 0,
- }
- print expected_results
- print results
- for user_id, result in expected_results.items():
- assert_equal(results[user_id][metric.id], result)
+ for user_id in expected:
+ assert_equal(results[user_id][metric.id], 1)
def test_wiki_cohort_nobody_qualifying(self):
# make everyone fail the registration criteria and make sure they're
excluded
diff --git a/wikimetrics/metrics/rolling_active_editor.py
b/wikimetrics/metrics/rolling_active_editor.py
index 35ffc9c..e8c344a 100644
--- a/wikimetrics/metrics/rolling_active_editor.py
+++ b/wikimetrics/metrics/rolling_active_editor.py
@@ -107,14 +107,12 @@
.subquery()
edits = revisions.union_all(archived).subquery()
- edits_by_user = session.query(
- edits.c.user_id,
- func.IF(func.SUM(edits.c.count) >= number_of_edits, 1, 0)
- )\
+ edits_by_user = session.query(edits.c.user_id)\
.filter(edits.c.user_id.notin_(bot_user_ids))\
- .group_by(edits.c.user_id)
+ .group_by(edits.c.user_id)\
+ .having(func.SUM(edits.c.count) >= number_of_edits)
- metric_results = {r[0]: {self.id : r[1]} for r in edits_by_user.all()}
+ metric_results = {r[0]: {self.id : 1} for r in edits_by_user.all()}
if user_ids is None:
return metric_results
diff --git a/wikimetrics/metrics/rolling_new_active_editor.py
b/wikimetrics/metrics/rolling_new_active_editor.py
index 1f2c9f9..006bb17 100644
--- a/wikimetrics/metrics/rolling_new_active_editor.py
+++ b/wikimetrics/metrics/rolling_new_active_editor.py
@@ -126,14 +126,12 @@
.subquery()
new_edits = revisions.union_all(archived).subquery()
- new_edits_by_user = session.query(
- new_edits.c.user_id,
- func.IF(func.SUM(new_edits.c.count) >= number_of_edits, 1, 0)
- )\
+ new_edits_by_user = session.query(new_edits.c.user_id)\
.filter(new_edits.c.user_id.notin_(bot_user_ids))\
- .group_by(new_edits.c.user_id)
+ .group_by(new_edits.c.user_id)\
+ .having(func.SUM(new_edits.c.count) >= number_of_edits)
- metric_results = {r[0]: {self.id : r[1]} for r in
new_edits_by_user.all()}
+ metric_results = {r[0]: {self.id : 1} for r in new_edits_by_user.all()}
if user_ids is None:
return metric_results
diff --git a/wikimetrics/metrics/rolling_surviving_new_active_editor.py
b/wikimetrics/metrics/rolling_surviving_new_active_editor.py
index d43afb8..900c1c4 100644
--- a/wikimetrics/metrics/rolling_surviving_new_active_editor.py
+++ b/wikimetrics/metrics/rolling_surviving_new_active_editor.py
@@ -145,19 +145,15 @@
# sum the count_one values together, check it's >= number_of_edits
# sum the count_two values together, check it's >= number_of_edits
new_edits = revisions.union_all(archived).subquery()
- new_edits_by_user = session.query(
- new_edits.c.user_id,
- func.IF(
- and_(
- func.SUM(new_edits.c.count_one) >= number_of_edits,
- func.SUM(new_edits.c.count_two) >= number_of_edits,
- ), 1, 0
- )
- )\
+ new_edits_by_user = session.query(new_edits.c.user_id)\
.filter(new_edits.c.user_id.notin_(bot_user_ids))\
- .group_by(new_edits.c.user_id)
+ .group_by(new_edits.c.user_id)\
+ .having(and_(
+ func.SUM(new_edits.c.count_one) >= number_of_edits,
+ func.SUM(new_edits.c.count_two) >= number_of_edits,
+ ))
- metric_results = {r[0]: {self.id : r[1]} for r in
new_edits_by_user.all()}
+ metric_results = {r[0]: {self.id : 1} for r in new_edits_by_user.all()}
if user_ids is None:
return metric_results
--
To view, visit https://gerrit.wikimedia.org/r/167583
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I280028ee85b8e97da8ea09b34012654c49c12c9f
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits