jenkins-bot has submitted this change and it was merged.

Change subject: Fix Aggregation of empty metric results
......................................................................


Fix Aggregation of empty metric results

When a metric that runs daily (like new register users)
does not return any results the agreggation step fails.

Relatedly, when a wiki level cohort runss through the system, the metric
and cohort service were not treating the group_by_project logic
correctly.  This has also been fixed with a test to prove it.

Bug: 66439
Change-Id: I9d4f3250760b9b3a3dc60849dd88e21148d024a7
---
M tests/fixtures.py
M tests/test_api/test_cohort_service.py
A tests/test_metrics/test_edge_cases.py
M wikimetrics/metrics/bytes_added.py
M wikimetrics/metrics/metric.py
M wikimetrics/metrics/namespace_edits.py
M wikimetrics/metrics/newly_registered.py
M wikimetrics/metrics/pages_created.py
M wikimetrics/metrics/revert_rate.py
M wikimetrics/metrics/survival.py
M wikimetrics/metrics/threshold.py
M wikimetrics/models/report_nodes/aggregate_report.py
M wikimetrics/models/report_nodes/metric_report.py
M wikimetrics/models/storage/cohort.py
M wikimetrics/utils.py
15 files changed, 130 insertions(+), 29 deletions(-)

Approvals:
  Nuria: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/tests/fixtures.py b/tests/fixtures.py
index 0d60ae1..c3dec56 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -538,7 +538,12 @@
         self.session.add(basic_wiki_cohort)
         self.session.commit()
         self.basic_wiki_cohort = basic_wiki_cohort
-
+        
+        owner = UserStore(username='test cohort owner', email='[email protected]')
+        self.session.add(owner)
+        self.session.commit()
+        self.owner_user_id = owner.id
+        
         cohort_user = CohortUserStore(
             user_id=self.owner_user_id,
             cohort_id=basic_wiki_cohort.id,
diff --git a/tests/test_api/test_cohort_service.py 
b/tests/test_api/test_cohort_service.py
index 69a14f1..99a9609 100644
--- a/tests/test_api/test_cohort_service.py
+++ b/tests/test_api/test_cohort_service.py
@@ -1,7 +1,7 @@
 from nose.tools import assert_equals, assert_true, raises
 from sqlalchemy.orm.exc import NoResultFound
 
-from tests.fixtures import DatabaseTest
+from tests.fixtures import DatabaseTest, mediawiki_project
 from wikimetrics.api import CohortService
 from wikimetrics.exceptions import Unauthorized, InvalidCohort
 from wikimetrics.models import CohortStore, CohortUserStore, CohortUserRole
@@ -116,3 +116,13 @@
     def test_get_wikiusers_empty(self):
         users = self.cohort_service.get_wikiusers(self.empty_cohort)
         assert_equals(len(users), 0)
+
+    def test_wiki_cohort_group_by_project_works(self):
+        self.create_wiki_cohort()
+        c = self.cohort_service.get(
+            self.session, self.owner_user_id, by_id=self.basic_wiki_cohort.id
+        )
+        users_by_project = list(self.cohort_service.get_users_by_project(c))
+        assert_equals(len(users_by_project), 1)
+        assert_equals(users_by_project[0][0], mediawiki_project)
+        assert_equals(users_by_project[0][1], None)
diff --git a/tests/test_metrics/test_edge_cases.py 
b/tests/test_metrics/test_edge_cases.py
new file mode 100644
index 0000000..2c6edb2
--- /dev/null
+++ b/tests/test_metrics/test_edge_cases.py
@@ -0,0 +1,53 @@
+from datetime import datetime, timedelta
+from nose.tools import assert_true, assert_equals, assert_false
+
+from tests.fixtures import QueueDatabaseTest
+from wikimetrics.metrics import NewlyRegistered
+from wikimetrics.metrics import (
+    metric_classes, NamespaceEdits, TimeseriesChoices,
+)
+from wikimetrics.utils import r
+from wikimetrics.models import Aggregation, AggregateReport
+
+
+class EdgeCasesTest(QueueDatabaseTest):
+    '''
+    Tests different cases for metric system when it comes to report results
+    '''
+    def setUp(self):
+        QueueDatabaseTest.setUp(self)
+
+    def test_aggregate_empty_results(self):
+        '''
+         Tests what happens when no users are
+         returned for the initial metric run
+         so there are no users to agreggate
+         '''
+        self.create_wiki_cohort()
+
+        metric = metric_classes['NamespaceEdits'](
+            name='NamespaceEdits',
+            namespaces=[0, 1, 2],
+            start_date='2010-01-01 00:00:00',
+            end_date='2010-01-02 00:00:00',
+        )
+        options = {
+            'individualResults': True,
+            'aggregateResults': True,
+            'aggregateSum': True,
+            'aggregateAverage': True,
+            'aggregateStandardDeviation': True,
+        }
+
+        ar = AggregateReport(
+            metric,
+            self.basic_wiki_cohort,
+            options,
+            user_id=self.basic_wiki_cohort_owner,
+        )
+        result = ar.task.delay(ar).get()
+
+        assert_equals(result[Aggregation.IND].keys(), [])
+        assert_equals(result[Aggregation.SUM]['edits'], r(0))
+        assert_equals(result[Aggregation.AVG]['edits'], r(0))
+        assert_equals(result[Aggregation.STD]['edits'], r(0))
diff --git a/wikimetrics/metrics/bytes_added.py 
b/wikimetrics/metrics/bytes_added.py
index 5310758..e486c49 100644
--- a/wikimetrics/metrics/bytes_added.py
+++ b/wikimetrics/metrics/bytes_added.py
@@ -60,11 +60,13 @@
             ) AS anon_1
       GROUP BY anon_1.rev_user
     """
-    show_in_ui  = True
-    id          = 'bytes-added'
-    label       = 'Bytes Added'
-    description = 'Compute different aggregations of the bytes\
-                   contributed or removed from a mediawiki project'
+    show_in_ui      = True
+    id              = 'bytes-added'
+    label           = 'Bytes Added'
+    description     = 'Compute different aggregations of the bytes\
+                       contributed or removed from a mediawiki project'
+    # filled in below as the default depends on options
+    default_result  = {}
     
     namespaces          = CommaSeparatedIntegerListField(
         None,
@@ -151,6 +153,8 @@
                 )).label('negative_only_sum'),
             )
             index += 1
-        
+
+        self.default_result = {s[0]: s[2] for s in submetrics}
+
         query = self.apply_timeseries(bytes_added_by_user, rev=BC.c)
         return self.results_by_user(user_ids, query, submetrics, 
date_index=index)
diff --git a/wikimetrics/metrics/metric.py b/wikimetrics/metrics/metric.py
index 3135c9e..4d39059 100644
--- a/wikimetrics/metrics/metric.py
+++ b/wikimetrics/metrics/metric.py
@@ -13,13 +13,14 @@
     class level properties that can be introspected by an interface.
     """
     
-    show_in_ui  = False
-    id          = None  # unique identifier for client-side use
+    show_in_ui      = False
+    id              = None  # unique identifier for client-side use
     
     # this will be displayed as the title of the metric-specific
     # tab in the request form
-    label       = None
-    description = None  # basic description of what the metric does
+    label           = None
+    description     = None  # basic description of what the metric does
+    default_result  = {}    # if results are empty, default to this
     
     def __call__(self, user_ids, session):
         """
diff --git a/wikimetrics/metrics/namespace_edits.py 
b/wikimetrics/metrics/namespace_edits.py
index 11a5f23..b99bd1d 100644
--- a/wikimetrics/metrics/namespace_edits.py
+++ b/wikimetrics/metrics/namespace_edits.py
@@ -36,7 +36,10 @@
         'Compute the number of edits in a specific'
         'namespace of a mediawiki project'
     )
-    
+    default_result  = {
+        'edits': 0,
+    }
+
     namespaces = CommaSeparatedIntegerListField(
         None,
         [Required()],
diff --git a/wikimetrics/metrics/newly_registered.py 
b/wikimetrics/metrics/newly_registered.py
index 5d41f5d..74f9410 100644
--- a/wikimetrics/metrics/newly_registered.py
+++ b/wikimetrics/metrics/newly_registered.py
@@ -28,6 +28,9 @@
         'A newly registered user is a previously unregistered user creating a 
username \
         for the first time on a Wikimedia project.'
     )
+    default_result  = {
+        'newly_registered': 0,
+    }
 
     start_date  = BetterDateTimeField(default=thirty_days_ago)
     end_date    = BetterDateTimeField(default=today)
@@ -66,6 +69,6 @@
             return metric_results
         else:
             return {
-                uid: metric_results.get(uid, {NewlyRegistered.id : 0})
+                uid: metric_results.get(uid, self.default_result)
                 for uid in user_ids
             }
diff --git a/wikimetrics/metrics/pages_created.py 
b/wikimetrics/metrics/pages_created.py
index 64650cc..fedfd8c 100644
--- a/wikimetrics/metrics/pages_created.py
+++ b/wikimetrics/metrics/pages_created.py
@@ -33,7 +33,10 @@
         'Compute the number of pages created by each \
          editor in a time interval'
     )
-    
+    default_result  = {
+        'pages_created': 0,
+    }
+
     namespaces = CommaSeparatedIntegerListField(
         None,
         [Required()],
diff --git a/wikimetrics/metrics/revert_rate.py 
b/wikimetrics/metrics/revert_rate.py
index e593370..a699662 100644
--- a/wikimetrics/metrics/revert_rate.py
+++ b/wikimetrics/metrics/revert_rate.py
@@ -34,10 +34,11 @@
       group by rev_user
     """
     
-    show_in_ui  = False
-    id          = 'revert-rate'
-    label       = 'Revert Rate'
-    description = 'Compute the number of reverted edits in a mediawiki project'
+    show_in_ui      = False
+    id              = 'revert-rate'
+    label           = 'Revert Rate'
+    description     = 'Compute the number of reverted edits in a mediawiki 
project'
+    default_result  = {}
     
     start_date  = BetterDateTimeField(default=thirty_days_ago)
     end_date    = BetterDateTimeField(default=today)
diff --git a/wikimetrics/metrics/survival.py b/wikimetrics/metrics/survival.py
index c4050df..dc47989 100644
--- a/wikimetrics/metrics/survival.py
+++ b/wikimetrics/metrics/survival.py
@@ -62,6 +62,10 @@
         If sunset_in_hours is 0, look for edits from \
         <registration> + <survival_hours> to <today>.'
     )
+    default_result = {
+        'survived': None,
+        CENSORED: None,
+    }
     
     number_of_edits = IntegerField(default=1)
     survival_hours  = IntegerField(default=0)
@@ -167,10 +171,7 @@
         }
         
         r = {
-            uid: metric_results.get(uid, {
-                Survival.id : None,
-                CENSORED    : None,
-            })
+            uid: metric_results.get(uid, self.default_result)
             for uid in user_ids or metric_results.keys()
         }
         
diff --git a/wikimetrics/metrics/threshold.py b/wikimetrics/metrics/threshold.py
index 9b90a71..5940601 100644
--- a/wikimetrics/metrics/threshold.py
+++ b/wikimetrics/metrics/threshold.py
@@ -63,6 +63,11 @@
         <registration> to <registration> + <threshold_hours>.  \
         Also compute the time it took them to reach that threshold, in hours.'
     )
+    default_result = {
+        'threshold': None,
+        'time_to_threshold': None,
+        CENSORED: None,
+    }
     
     number_of_edits = IntegerField(default=1)
     threshold_hours = IntegerField(default=24)
diff --git a/wikimetrics/models/report_nodes/aggregate_report.py 
b/wikimetrics/models/report_nodes/aggregate_report.py
index 677b3ad..75f0881 100644
--- a/wikimetrics/models/report_nodes/aggregate_report.py
+++ b/wikimetrics/models/report_nodes/aggregate_report.py
@@ -3,7 +3,7 @@
 from collections import OrderedDict
 from celery.utils.log import get_task_logger
 
-from wikimetrics.utils import stringify, CENSORED, r
+from wikimetrics.utils import stringify, CENSORED, NO_RESULTS, r
 from report import ReportNode
 from multi_project_metric_report import MultiProjectMetricReport
 
@@ -87,6 +87,8 @@
                 )
         
         if self.individual:
+            if NO_RESULTS in results_by_user:
+                results_by_user = {}
             aggregated_results[Aggregation.IND] = results_by_user
         
         return aggregated_results
diff --git a/wikimetrics/models/report_nodes/metric_report.py 
b/wikimetrics/models/report_nodes/metric_report.py
index 23e6841..d1e7b2a 100644
--- a/wikimetrics/models/report_nodes/metric_report.py
+++ b/wikimetrics/models/report_nodes/metric_report.py
@@ -1,9 +1,7 @@
 from wikimetrics.configurables import db
 from report import ReportLeaf
 from wikimetrics.models.storage.wikiuser import WikiUserKey
-
-
-__all__ = ['MetricReport']
+from wikimetrics.utils import NO_RESULTS
 
 
 class MetricReport(ReportLeaf):
@@ -23,7 +21,10 @@
         """
         super(MetricReport, self).__init__(*args, **kwargs)
         self.metric = metric
-        self.user_ids = list(user_ids)
+        if user_ids is not None:
+            self.user_ids = list(user_ids)
+        else:
+            self.user_ids = None
         self.cohort_id = cohort_id
         self.project = project
 
@@ -31,10 +32,13 @@
         session = db.get_mw_session(self.project)
         try:
             results_by_user = self.metric(self.user_ids, session)
-            return {
+            results = {
                 str(WikiUserKey(key, self.project, self.cohort_id)) : value
                 for key, value in results_by_user.items()
             }
+            if not len(results):
+                results = {NO_RESULTS : self.metric.default_result}
+            return results
         finally:
             session.close()
     
diff --git a/wikimetrics/models/storage/cohort.py 
b/wikimetrics/models/storage/cohort.py
index a96e8bf..2785c22 100644
--- a/wikimetrics/models/storage/cohort.py
+++ b/wikimetrics/models/storage/cohort.py
@@ -98,6 +98,10 @@
             ).order_by(WikiUserStore.project).all()
         finally:
             db_session.close()
+
+        if not len(user_id_projects):
+            return [(self.default_project, None)]
+
         groups = itertools.groupby(user_id_projects, key=itemgetter(1))
 
         return (
diff --git a/wikimetrics/utils.py b/wikimetrics/utils.py
index 79a85a0..72ed8e0 100644
--- a/wikimetrics/utils.py
+++ b/wikimetrics/utils.py
@@ -14,6 +14,8 @@
 PRETTY_TIMESTAMP = '%Y-%m-%d %H:%M:%S'
 # This is used to mean that a result was censored in some way
 CENSORED = 'censored'
+# Used to mean a metric is not returning any results
+NO_RESULTS = 'no-results'
 # Unicode NULL
 UNICODE_NULL = u'\x00'
 

-- 
To view, visit https://gerrit.wikimedia.org/r/138031
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I9d4f3250760b9b3a3dc60849dd88e21148d024a7
Gerrit-PatchSet: 2
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Nuria <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to