Nuria has submitted this change and it was merged.

Change subject: Filter out bots from relevant metrics
......................................................................


Filter out bots from relevant metrics

Using the definition that bots are users showing up in the user_groups
table with a ug_group value of 'bot', we filter out bots from the
rolling metrics.

Bug: 72134
Change-Id: I53258947816e35b770c4b1fccfe9f4a98f82554e
---
M tests/fixtures.py
M tests/test_metrics/test_rolling_active_editor.py
M tests/test_metrics/test_rolling_new_active_editor.py
M tests/test_metrics/test_rolling_surviving_new_active_editor.py
M tests/test_models/test_mappings.py
M wikimetrics/metrics/rolling_active_editor.py
M wikimetrics/metrics/rolling_new_active_editor.py
M wikimetrics/metrics/rolling_surviving_new_active_editor.py
M wikimetrics/models/mediawiki/__init__.py
A wikimetrics/models/mediawiki/user_groups.py
10 files changed, 110 insertions(+), 8 deletions(-)

Approvals:
  Mforns: Checked; Looks good to me, but someone else must approve
  Nuria: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/tests/fixtures.py b/tests/fixtures.py
index 3ac24b4..2b62b9a 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -23,6 +23,7 @@
     Revision,
     Page,
     MediawikiUser,
+    MediawikiUserGroups,
     Logging,
     Archive,
 )
@@ -593,6 +594,13 @@
         self.mwSession.query(Revision).delete()
         self.mwSession.commit()
 
+    def make_bot(self, user_id, session):
+        """
+        Update the database to make user_id a bot
+        """
+        session.add(MediawikiUserGroups(ug_user=user_id, ug_group='bot'))
+        session.commit()
+
     def setUp(self):
         #****************************************************************
         # set up and clean database (Warning: this DESTROYS ALL DATA)
@@ -613,6 +621,7 @@
         self.mwSession.query(Logging).delete()
         self.mwSession.query(Revision).delete()
         self.mwSession.query(Archive).delete()
+        self.mwSession.query(MediawikiUserGroups).delete()
         self.mwSession.query(MediawikiUser).delete()
         self.mwSession.query(Page).delete()
         self.mwSession.commit()
diff --git a/tests/test_metrics/test_rolling_active_editor.py 
b/tests/test_metrics/test_rolling_active_editor.py
index 4734205..2312b4b 100644
--- a/tests/test_metrics/test_rolling_active_editor.py
+++ b/tests/test_metrics/test_rolling_active_editor.py
@@ -3,7 +3,7 @@
 
 from tests.fixtures import DatabaseTest, i, d
 from wikimetrics.utils import format_pretty_date as s
-from wikimetrics.models import Revision
+from wikimetrics.models import Revision, MediawikiUser
 from wikimetrics.metrics import RollingActiveEditor
 from wikimetrics.enums import TimeseriesChoices
 
@@ -116,3 +116,15 @@
             assert_equal(results[user_id][metric.id], result)
         # users with no edits at all just won't show up
         assert_equal(results.get(self.editor_ids[3], -1), -1)
+
+    def test_wiki_cohort_all_bots(self):
+        # make everyone a bot and make sure they're excluded
+        for r in self.mwSession.query(MediawikiUser.user_id).all():
+            self.make_bot(r[0], self.mwSession)
+
+        metric = RollingActiveEditor(
+            end_date=self.r_plus_30,
+        )
+        results = metric(None, self.mwSession)
+
+        assert_equal(results.keys(), [])
diff --git a/tests/test_metrics/test_rolling_new_active_editor.py 
b/tests/test_metrics/test_rolling_new_active_editor.py
index 30b9880..b057819 100644
--- a/tests/test_metrics/test_rolling_new_active_editor.py
+++ b/tests/test_metrics/test_rolling_new_active_editor.py
@@ -3,7 +3,7 @@
 
 from tests.fixtures import DatabaseTest, i, d
 from wikimetrics.utils import format_pretty_date as s
-from wikimetrics.models import Revision, Logging
+from wikimetrics.models import Revision, Logging, MediawikiUser
 from wikimetrics.metrics import RollingNewActiveEditor
 from wikimetrics.enums import TimeseriesChoices
 
@@ -152,3 +152,15 @@
         results = metric(None, self.mwSession)
 
         assert_equal(results.keys(), [])
+
+    def test_wiki_cohort_all_bots(self):
+        # make everyone a bot and make sure they're excluded
+        for r in self.mwSession.query(MediawikiUser.user_id).all():
+            self.make_bot(r[0], self.mwSession)
+
+        metric = RollingNewActiveEditor(
+            end_date=self.r_plus_30,
+        )
+        results = metric(None, self.mwSession)
+
+        assert_equal(results.keys(), [])
diff --git a/tests/test_metrics/test_rolling_surviving_new_active_editor.py 
b/tests/test_metrics/test_rolling_surviving_new_active_editor.py
index a8eb480..d60f53a 100644
--- a/tests/test_metrics/test_rolling_surviving_new_active_editor.py
+++ b/tests/test_metrics/test_rolling_surviving_new_active_editor.py
@@ -3,7 +3,7 @@
 
 from tests.fixtures import DatabaseTest, i, d
 from wikimetrics.utils import format_pretty_date as s
-from wikimetrics.models import Revision, Logging
+from wikimetrics.models import Revision, Logging, MediawikiUser
 from wikimetrics.metrics import RollingSurvivingNewActiveEditor
 from wikimetrics.enums import TimeseriesChoices
 
@@ -165,3 +165,15 @@
         results = metric(None, self.mwSession)
 
         assert_equal(results.keys(), [])
+
+    def test_wiki_cohort_all_bots(self):
+        # make everyone a bot and make sure they're excluded
+        for r in self.mwSession.query(MediawikiUser.user_id).all():
+            self.make_bot(r[0], self.mwSession)
+
+        metric = RollingSurvivingNewActiveEditor(
+            end_date=self.r_plus_60,
+        )
+        results = metric(None, self.mwSession)
+
+        assert_equal(results.keys(), [])
diff --git a/tests/test_models/test_mappings.py 
b/tests/test_models/test_mappings.py
index 116201a..323f3a0 100644
--- a/tests/test_models/test_mappings.py
+++ b/tests/test_models/test_mappings.py
@@ -10,6 +10,7 @@
     Logging,
     Page,
     MediawikiUser,
+    MediawikiUserGroups,
     Revision,
 )
 from wikimetrics.enums import CohortUserRole, UserRole
@@ -71,6 +72,13 @@
         row = self.mwSession.query(MediawikiUser).get(self.editors[0].user_id)
         assert_equal(row.user_name, 'Editor test-specific-0')
     
+    def test_mediawiki_user_groups(self):
+        ug = MediawikiUserGroups(ug_user=self.editors[0].user_id, 
ug_group='test')
+        self.mwSession.add(ug)
+        self.mwSession.commit()
+        fetch = self.mwSession.query(MediawikiUserGroups).first()
+        assert_equal(fetch.ug_group, 'test')
+    
     def test_mediawiki_page(self):
         row = self.mwSession.query(Page).get(self.revisions[0].rev_page)
         assert_equal(row.page_title, 'test-specific-page')
diff --git a/wikimetrics/metrics/rolling_active_editor.py 
b/wikimetrics/metrics/rolling_active_editor.py
index db1d677..35ffc9c 100644
--- a/wikimetrics/metrics/rolling_active_editor.py
+++ b/wikimetrics/metrics/rolling_active_editor.py
@@ -6,7 +6,9 @@
 
 from wikimetrics.forms.fields import BetterDateTimeField
 from wikimetrics.utils import today
-from wikimetrics.models.mediawiki import Revision, MediawikiUser, Archive
+from wikimetrics.models.mediawiki import (
+    Revision, MediawikiUser, Archive, MediawikiUserGroups
+)
 from metric import Metric
 
 
@@ -43,6 +45,12 @@
             ) AS user_content_revision_count
       GROUP BY user_id
      HAVING SUM(revisions) >= @n;
+
+    NOTE: updated to exclude bots as identified by:
+
+     SELECT ug_user
+       FROM user_groups
+      WHERE ug_group = 'bot'
     """
 
     show_in_ui  = True
@@ -94,11 +102,16 @@
             .group_by(Archive.ar_user)
         archived = self.filter(archived, user_ids, column=Archive.ar_user)
 
+        bot_user_ids = session.query(MediawikiUserGroups.ug_user)\
+            .filter(MediawikiUserGroups.ug_group == 'bot')\
+            .subquery()
+
         edits = revisions.union_all(archived).subquery()
         edits_by_user = session.query(
             edits.c.user_id,
             func.IF(func.SUM(edits.c.count) >= number_of_edits, 1, 0)
         )\
+            .filter(edits.c.user_id.notin_(bot_user_ids))\
             .group_by(edits.c.user_id)
 
         metric_results = {r[0]: {self.id : r[1]} for r in edits_by_user.all()}
diff --git a/wikimetrics/metrics/rolling_new_active_editor.py 
b/wikimetrics/metrics/rolling_new_active_editor.py
index 3456485..1f2c9f9 100644
--- a/wikimetrics/metrics/rolling_new_active_editor.py
+++ b/wikimetrics/metrics/rolling_new_active_editor.py
@@ -5,7 +5,7 @@
 
 from wikimetrics.forms.fields import BetterDateTimeField
 from wikimetrics.utils import today
-from wikimetrics.models.mediawiki import Revision, Archive, Logging
+from wikimetrics.models.mediawiki import Revision, Archive, Logging, 
MediawikiUserGroups
 from metric import Metric
 
 
@@ -51,6 +51,12 @@
             ) AS user_content_revision_count
       GROUP BY user_id
      HAVING SUM(revisions) >= @n;
+
+    NOTE: updated to exclude bots as identified by:
+
+     SELECT ug_user
+       FROM user_groups
+      WHERE ug_group = 'bot'
     """
 
     show_in_ui  = True
@@ -115,11 +121,16 @@
             .filter(Archive.ar_user.in_(filtered_new))\
             .group_by(Archive.ar_user)
 
+        bot_user_ids = session.query(MediawikiUserGroups.ug_user)\
+            .filter(MediawikiUserGroups.ug_group == 'bot')\
+            .subquery()
+
         new_edits = revisions.union_all(archived).subquery()
         new_edits_by_user = session.query(
             new_edits.c.user_id,
             func.IF(func.SUM(new_edits.c.count) >= number_of_edits, 1, 0)
         )\
+            .filter(new_edits.c.user_id.notin_(bot_user_ids))\
             .group_by(new_edits.c.user_id)
 
         metric_results = {r[0]: {self.id : r[1]} for r in 
new_edits_by_user.all()}
diff --git a/wikimetrics/metrics/rolling_surviving_new_active_editor.py 
b/wikimetrics/metrics/rolling_surviving_new_active_editor.py
index 22e3c06..d43afb8 100644
--- a/wikimetrics/metrics/rolling_surviving_new_active_editor.py
+++ b/wikimetrics/metrics/rolling_surviving_new_active_editor.py
@@ -5,7 +5,7 @@
 
 from wikimetrics.forms.fields import BetterDateTimeField
 from wikimetrics.utils import today
-from wikimetrics.models.mediawiki import Revision, Archive, Logging
+from wikimetrics.models.mediawiki import Revision, Archive, Logging, 
MediawikiUserGroups
 from metric import Metric
 
 
@@ -52,8 +52,14 @@
           GROUP BY user_id
         ) AS user_content_revision_count
   GROUP BY user_id
- HAVING SUM(revisions1) >= @n;
-    AND SUM(revisions2) >= @n;
+ HAVING SUM(revisions1) >= @n
+    AND SUM(revisions2) >= @n
+
+    NOTE: updated to exclude bots as identified by:
+
+ SELECT ug_user
+   FROM user_groups
+  WHERE ug_group = 'bot'
     """
 
     show_in_ui  = True
@@ -131,6 +137,10 @@
             .filter(Archive.ar_user.in_(filtered_new))\
             .group_by(Archive.ar_user)
 
+        bot_user_ids = session.query(MediawikiUserGroups.ug_user)\
+            .filter(MediawikiUserGroups.ug_group == 'bot')\
+            .subquery()
+
         # For each user, with both counts from both tables,
         #   sum the count_one values together, check it's >= number_of_edits
         #   sum the count_two values together, check it's >= number_of_edits
@@ -144,6 +154,7 @@
                 ), 1, 0
             )
         )\
+            .filter(new_edits.c.user_id.notin_(bot_user_ids))\
             .group_by(new_edits.c.user_id)
 
         metric_results = {r[0]: {self.id : r[1]} for r in 
new_edits_by_user.all()}
diff --git a/wikimetrics/models/mediawiki/__init__.py 
b/wikimetrics/models/mediawiki/__init__.py
index 0eb46f8..bf82c52 100644
--- a/wikimetrics/models/mediawiki/__init__.py
+++ b/wikimetrics/models/mediawiki/__init__.py
@@ -3,6 +3,7 @@
 from revision import *
 from page import *
 from user import *
+from user_groups import *
 from logging import *
 from archive import *
 
diff --git a/wikimetrics/models/mediawiki/user_groups.py 
b/wikimetrics/models/mediawiki/user_groups.py
new file mode 100644
index 0000000..b61324f
--- /dev/null
+++ b/wikimetrics/models/mediawiki/user_groups.py
@@ -0,0 +1,13 @@
+from sqlalchemy import Column, Integer, ForeignKey
+from sqlalchemy.dialects.mysql import VARBINARY
+
+from wikimetrics.configurables import db
+
+
+class MediawikiUserGroups(db.MediawikiBase):
+    __tablename__ = 'user_groups'
+
+    ug_user = Column(
+        Integer, ForeignKey('user.user_id'), nullable=False, default=0, 
primary_key=True
+    )
+    ug_group = Column(VARBINARY(255), nullable=False, primary_key=True)

-- 
To view, visit https://gerrit.wikimedia.org/r/167064
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I53258947816e35b770c4b1fccfe9f4a98f82554e
Gerrit-PatchSet: 2
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to