jenkins-bot has submitted this change and it was merged.

Change subject: Improve performance by excluding 0 results
......................................................................


Improve performance by excluding 0 results

When running wiki cohorts, these three metrics were returning
editor_id: 0 for editors that passed early filters but did not pass the
ultimate metric test.  This is inefficient for a few reasons, so I
cleaned up the tests that expected this behavior and fixed the code.

Change-Id: I280028ee85b8e97da8ea09b34012654c49c12c9f
---
M tests/test_metrics/test_rolling_active_editor.py
M tests/test_metrics/test_rolling_new_active_editor.py
M tests/test_metrics/test_rolling_surviving_new_active_editor.py
M wikimetrics/metrics/rolling_active_editor.py
M wikimetrics/metrics/rolling_new_active_editor.py
M wikimetrics/metrics/rolling_surviving_new_active_editor.py
6 files changed, 30 insertions(+), 63 deletions(-)

Approvals:
  Mforns: Checked; Looks good to me, but someone else must approve
  Nuria: Verified; Looks good to me, approved
  jenkins-bot: Verified



diff --git a/tests/test_metrics/test_rolling_active_editor.py 
b/tests/test_metrics/test_rolling_active_editor.py
index 2312b4b..f150c12 100644
--- a/tests/test_metrics/test_rolling_active_editor.py
+++ b/tests/test_metrics/test_rolling_active_editor.py
@@ -96,26 +96,18 @@
         expected = set(self.editor_ids + [make_active.user_id])
         # editors with no edits at all won't be picked up by the query
         expected.remove(self.editor_ids[3])
+        # editors that haven't made enough queries won't pass the test
+        expected.remove(self.editor_ids[1])
 
         metric = RollingActiveEditor(
             end_date=self.r_plus_30,
         )
         results = metric(None, self.mwSession)
 
+        # all wiki cohort results will be editors that pass the test
         assert_equal(set(results.keys()), expected)
-        expected_results = {
-            # all actives show, whether in a cohort or not
-            self.editor_ids[0] : 1,
-            self.editor_ids[2] : 1,
-            self.editor_ids[4] : 1,
-            make_active.user_id : 1,
-            # users with not enough edits will show up with 0 as the result
-            self.editor_ids[1] : 0,
-        }
-        for user_id, result in expected_results.items():
-            assert_equal(results[user_id][metric.id], result)
-        # users with no edits at all just won't show up
-        assert_equal(results.get(self.editor_ids[3], -1), -1)
+        for user_id in expected:
+            assert_equal(results[user_id][metric.id], 1)
 
     def test_wiki_cohort_all_bots(self):
         # make everyone a bot and make sure they're excluded
diff --git a/tests/test_metrics/test_rolling_new_active_editor.py 
b/tests/test_metrics/test_rolling_new_active_editor.py
index b057819..d80ff1b 100644
--- a/tests/test_metrics/test_rolling_new_active_editor.py
+++ b/tests/test_metrics/test_rolling_new_active_editor.py
@@ -118,24 +118,18 @@
         expected.remove(self.editor_ids[2])
         expected.remove(self.editor_ids[3])
         expected.remove(self.editor_ids[4])
+        # editors that haven't made enough queries won't pass the test
+        expected.remove(self.editor_ids[6])
 
         metric = RollingNewActiveEditor(
             end_date=self.r_plus_30,
         )
         results = metric(None, self.mwSession)
 
+        # all wiki cohort results will be editors that pass the test
         assert_equal(set(results.keys()), expected)
-        expected_results = {
-            # all actives show, whether in a cohort or not
-            self.editor_ids[5] : 1,
-            self.editor_ids[7] : 1,
-            self.editor_ids[9] : 1,
-            make_active.user_id : 1,
-            # users with not enough edits will show up with 0 as the result
-            self.editor_ids[6] : 0,
-        }
-        for user_id, result in expected_results.items():
-            assert_equal(results[user_id][metric.id], result)
+        for user_id in expected:
+            assert_equal(results[user_id][metric.id], 1)
 
     def test_wiki_cohort_nobody_qualifying(self):
         # make everyone fail the registration criteria and make sure they're 
excluded
diff --git a/tests/test_metrics/test_rolling_surviving_new_active_editor.py 
b/tests/test_metrics/test_rolling_surviving_new_active_editor.py
index d60f53a..ac7b424 100644
--- a/tests/test_metrics/test_rolling_surviving_new_active_editor.py
+++ b/tests/test_metrics/test_rolling_surviving_new_active_editor.py
@@ -125,7 +125,8 @@
         # editors with no edits at all won't be picked up by the query
         expected.remove(self.editor_ids[13])
         # editors that haven't registered in time shouldn't be picked up at all
-        for reg_before in range(7):
+        # editors that haven't made enough queries won't pass the test
+        for reg_before in range(11):
             expected.remove(self.editor_ids[reg_before])
 
         metric = RollingSurvivingNewActiveEditor(
@@ -133,22 +134,10 @@
         )
         results = metric(None, self.mwSession)
 
+        # all wiki cohort results will be editors that pass the test
         assert_equal(set(results.keys()), expected)
-        expected_results = {
-            # all actives show, whether in a cohort or not
-            self.editor_ids[11] : 1,
-            self.editor_ids[12] : 1,
-            make_active.user_id : 1,
-            # users with not enough edits will show up with 0 as the result
-            self.editor_ids[7] : 0,
-            self.editor_ids[8] : 0,
-            self.editor_ids[9] : 0,
-            self.editor_ids[10] : 0,
-        }
-        print expected_results
-        print results
-        for user_id, result in expected_results.items():
-            assert_equal(results[user_id][metric.id], result)
+        for user_id in expected:
+            assert_equal(results[user_id][metric.id], 1)
 
     def test_wiki_cohort_nobody_qualifying(self):
         # make everyone fail the registration criteria and make sure they're 
excluded
diff --git a/wikimetrics/metrics/rolling_active_editor.py 
b/wikimetrics/metrics/rolling_active_editor.py
index 35ffc9c..e8c344a 100644
--- a/wikimetrics/metrics/rolling_active_editor.py
+++ b/wikimetrics/metrics/rolling_active_editor.py
@@ -107,14 +107,12 @@
             .subquery()
 
         edits = revisions.union_all(archived).subquery()
-        edits_by_user = session.query(
-            edits.c.user_id,
-            func.IF(func.SUM(edits.c.count) >= number_of_edits, 1, 0)
-        )\
+        edits_by_user = session.query(edits.c.user_id)\
             .filter(edits.c.user_id.notin_(bot_user_ids))\
-            .group_by(edits.c.user_id)
+            .group_by(edits.c.user_id)\
+            .having(func.SUM(edits.c.count) >= number_of_edits)
 
-        metric_results = {r[0]: {self.id : r[1]} for r in edits_by_user.all()}
+        metric_results = {r[0]: {self.id : 1} for r in edits_by_user.all()}
 
         if user_ids is None:
             return metric_results
diff --git a/wikimetrics/metrics/rolling_new_active_editor.py 
b/wikimetrics/metrics/rolling_new_active_editor.py
index 1f2c9f9..006bb17 100644
--- a/wikimetrics/metrics/rolling_new_active_editor.py
+++ b/wikimetrics/metrics/rolling_new_active_editor.py
@@ -126,14 +126,12 @@
             .subquery()
 
         new_edits = revisions.union_all(archived).subquery()
-        new_edits_by_user = session.query(
-            new_edits.c.user_id,
-            func.IF(func.SUM(new_edits.c.count) >= number_of_edits, 1, 0)
-        )\
+        new_edits_by_user = session.query(new_edits.c.user_id)\
             .filter(new_edits.c.user_id.notin_(bot_user_ids))\
-            .group_by(new_edits.c.user_id)
+            .group_by(new_edits.c.user_id)\
+            .having(func.SUM(new_edits.c.count) >= number_of_edits)
 
-        metric_results = {r[0]: {self.id : r[1]} for r in 
new_edits_by_user.all()}
+        metric_results = {r[0]: {self.id : 1} for r in new_edits_by_user.all()}
 
         if user_ids is None:
             return metric_results
diff --git a/wikimetrics/metrics/rolling_surviving_new_active_editor.py 
b/wikimetrics/metrics/rolling_surviving_new_active_editor.py
index d43afb8..900c1c4 100644
--- a/wikimetrics/metrics/rolling_surviving_new_active_editor.py
+++ b/wikimetrics/metrics/rolling_surviving_new_active_editor.py
@@ -145,19 +145,15 @@
         #   sum the count_one values together, check it's >= number_of_edits
         #   sum the count_two values together, check it's >= number_of_edits
         new_edits = revisions.union_all(archived).subquery()
-        new_edits_by_user = session.query(
-            new_edits.c.user_id,
-            func.IF(
-                and_(
-                    func.SUM(new_edits.c.count_one) >= number_of_edits,
-                    func.SUM(new_edits.c.count_two) >= number_of_edits,
-                ), 1, 0
-            )
-        )\
+        new_edits_by_user = session.query(new_edits.c.user_id)\
             .filter(new_edits.c.user_id.notin_(bot_user_ids))\
-            .group_by(new_edits.c.user_id)
+            .group_by(new_edits.c.user_id)\
+            .having(and_(
+                func.SUM(new_edits.c.count_one) >= number_of_edits,
+                func.SUM(new_edits.c.count_two) >= number_of_edits,
+            ))
 
-        metric_results = {r[0]: {self.id : r[1]} for r in 
new_edits_by_user.all()}
+        metric_results = {r[0]: {self.id : 1} for r in new_edits_by_user.all()}
 
         if user_ids is None:
             return metric_results

-- 
To view, visit https://gerrit.wikimedia.org/r/167583
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I280028ee85b8e97da8ea09b34012654c49c12c9f
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to