jenkins-bot has submitted this change and it was merged.
Change subject: Make apply timeseries more flexible
......................................................................
Make apply timeseries more flexible
Change-Id: Ie6f209c35b6ab8b2a696493d921376da060cc740
---
M tests/test_metrics/test_namespace_edits.py
M wikimetrics/metrics/bytes_added.py
M wikimetrics/metrics/namespace_edits.py
M wikimetrics/metrics/timeseries_metric.py
4 files changed, 72 insertions(+), 27 deletions(-)
Approvals:
Nuria: Looks good to me, approved
jenkins-bot: Verified
diff --git a/tests/test_metrics/test_namespace_edits.py
b/tests/test_metrics/test_namespace_edits.py
index 133997c..05f3e3d 100644
--- a/tests/test_metrics/test_namespace_edits.py
+++ b/tests/test_metrics/test_namespace_edits.py
@@ -81,6 +81,22 @@
)
assert_true(not metric.validate())
+ def test_filters_out_other_editors_with_archive(self):
+ self.archive_revisions()
+ self.test_filters_out_other_editors()
+
+ def test_runs_for_an_entire_wiki_with_archive(self):
+ self.archive_revisions()
+ self.test_runs_for_an_entire_wiki()
+
+ def test_finds_edits_with_archive(self):
+ self.archive_revisions()
+ self.test_finds_edits()
+
+ def test_reports_zero_edits_with_archive(self):
+ self.archive_revisions()
+ self.test_reports_zero_edits()
+
class NamespaceEditsFullTest(QueueDatabaseTest):
def setUp(self):
@@ -162,6 +178,9 @@
assert_true(results is not None)
assert_equal(results[self.editor(0)]['edits'], 3)
+ def test_all_with_archive(self):
+ pass
+
class NamespaceEditsTimestampTest(DatabaseTest):
@@ -206,6 +225,9 @@
)
results = metric(self.editor_ids, self.mwSession)
assert_equal(results[self.editors[0].user_id]['edits'], 1)
+
+ def test_all_with_archive(self):
+ pass
class NamespaceEditsTimeseriesTest(DatabaseTest):
@@ -264,3 +286,6 @@
'2013-01-01 01:00:00' : 1,
}
)
+
+ def test_all_with_archive(self):
+ pass
diff --git a/wikimetrics/metrics/bytes_added.py
b/wikimetrics/metrics/bytes_added.py
index 0e057c5..eed9120 100644
--- a/wikimetrics/metrics/bytes_added.py
+++ b/wikimetrics/metrics/bytes_added.py
@@ -154,5 +154,5 @@
self.default_result = {s[0]: s[2] for s in submetrics}
- query = self.apply_timeseries(bytes_added_by_user, rev=BC.c)
+ query = self.apply_timeseries(bytes_added_by_user,
column=BC.c.rev_timestamp)
return self.results_by_user(user_ids, query, submetrics,
date_index=index)
diff --git a/wikimetrics/metrics/namespace_edits.py
b/wikimetrics/metrics/namespace_edits.py
index e55697b..c952682 100644
--- a/wikimetrics/metrics/namespace_edits.py
+++ b/wikimetrics/metrics/namespace_edits.py
@@ -1,8 +1,9 @@
from sqlalchemy import func
+from sqlalchemy.sql.expression import label
from wtforms.validators import Required
from wikimetrics.utils import thirty_days_ago, today
-from wikimetrics.models import Page, Revision
+from wikimetrics.models import Page, Revision, Archive
from wikimetrics.forms.fields import CommaSeparatedIntegerListField
from timeseries_metric import TimeseriesMetric
@@ -12,9 +13,9 @@
This class implements namespace edits logic.
An instance of the class is callable and will compute the number of edits
for each user in a passed-in list.
-
+
This sql query was used as a starting point for the sqlalchemy query:
-
+
select r.rev_user, r.count(*)
from revision r
inner join
@@ -24,7 +25,7 @@
and p.page_namespace in ([parameterized])
group by rev_user
"""
-
+
show_in_ui = True
id = 'edits'
label = 'Edits'
@@ -43,32 +44,51 @@
default='0',
description='0, 2, 4, etc.',
)
-
+
def __call__(self, user_ids, session):
"""
Parameters:
user_ids : list of mediawiki user ids to find edit for
session : sqlalchemy session open on a mediawiki database
-
+
Returns:
dictionary from user ids to the number of edit found.
"""
start_date = self.start_date.data
end_date = self.end_date.data
-
- query = session\
- .query(Revision.rev_user, func.count(Revision.rev_id))\
+
+ revisions = session\
+ .query(
+ label('user_id', Revision.rev_user),
+ label('timestamp', Revision.rev_timestamp)
+ )\
.join(Page)\
.filter(Page.page_namespace.in_(self.namespaces.data))\
.filter(Revision.rev_timestamp > start_date)\
- .filter(Revision.rev_timestamp <= end_date)\
- .group_by(Revision.rev_user)
-
- query = self.filter(query, user_ids)
- query = self.apply_timeseries(query)
+ .filter(Revision.rev_timestamp <= end_date)
+
+ archives = session\
+ .query(
+ label('user_id', Archive.ar_user),
+ label('timestamp', Archive.ar_timestamp)
+ )\
+ .filter(Archive.ar_namespace.in_(self.namespaces.data))\
+ .filter(Archive.ar_timestamp > start_date)\
+ .filter(Archive.ar_timestamp <= end_date)
+
+ revisions = self.filter(revisions, user_ids, column=Revision.rev_user)
+ archives = self.filter(archives, user_ids, column=Archive.ar_user)
+
+ both = revisions.union_all(archives).subquery()
+
+ query = session.query(both.c.user_id, func.count())\
+ .group_by(both.c.user_id)
+
+ query = self.apply_timeseries(query, column=both.c.timestamp)
+
return self.results_by_user(
user_ids,
query,
- [('edits', 1, 0)],
+ [(self.id, 1, 0)],
date_index=2,
)
diff --git a/wikimetrics/metrics/timeseries_metric.py
b/wikimetrics/metrics/timeseries_metric.py
index e2854aa..a09a7fb 100644
--- a/wikimetrics/metrics/timeseries_metric.py
+++ b/wikimetrics/metrics/timeseries_metric.py
@@ -32,14 +32,14 @@
],
)
- def apply_timeseries(self, query, rev=Revision):
+ def apply_timeseries(self, query, column=Revision.rev_timestamp):
"""
Take a query and slice it up into equal time intervals
Parameters
query : a sql alchemy query
- rev : defaults to Revision, specifies the object that
- contains the appropriate rev_timestamp
+ column : defaults to Revision.rev_timestamp, specifies the
timestamp
+ column to use for the timeseries
Returns
The query parameter passed in, with a grouping by the desired time
slice
@@ -49,26 +49,26 @@
if choice == TimeseriesChoices.NONE:
return query
- query = query.add_column(func.year(rev.rev_timestamp))
- query = query.group_by(func.year(rev.rev_timestamp))
+ query = query.add_column(func.year(column))
+ query = query.group_by(func.year(column))
if choice == TimeseriesChoices.YEAR:
return query
- query = query.add_column(func.month(rev.rev_timestamp))
- query = query.group_by(func.month(rev.rev_timestamp))
+ query = query.add_column(func.month(column))
+ query = query.group_by(func.month(column))
if choice == TimeseriesChoices.MONTH:
return query
- query = query.add_column(func.day(rev.rev_timestamp))
- query = query.group_by(func.day(rev.rev_timestamp))
+ query = query.add_column(func.day(column))
+ query = query.group_by(func.day(column))
if choice == TimeseriesChoices.DAY:
return query
- query = query.add_column(func.hour(rev.rev_timestamp))
- query = query.group_by(func.hour(rev.rev_timestamp))
+ query = query.add_column(func.hour(column))
+ query = query.group_by(func.hour(column))
if choice == TimeseriesChoices.HOUR:
return query
--
To view, visit https://gerrit.wikimedia.org/r/161366
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie6f209c35b6ab8b2a696493d921376da060cc740
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits