Milimetric has uploaded a new change for review. https://gerrit.wikimedia.org/r/119986
Change subject: Add utility functions for scheduling ...................................................................... Add utility functions for scheduling Adds two utility functions that are useful for scheduled task implementation. * diff_datewise finds differences between lists of dates, and supports parsing those lists from lists of strings. * timestamps_to_now gives you an efficient datastructure that enumerates dates from some start to now, at some interval. Card: analytics 1378 Change-Id: I16245eacf86ba9d01de6e9f752fe1026ad504628 --- M tests/test_utils/test_one_off_functions.py M wikimetrics/utils.py 2 files changed, 106 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics refs/changes/86/119986/1 diff --git a/tests/test_utils/test_one_off_functions.py b/tests/test_utils/test_one_off_functions.py index 407d15c..bf1f488 100644 --- a/tests/test_utils/test_one_off_functions.py +++ b/tests/test_utils/test_one_off_functions.py @@ -1,5 +1,5 @@ # -*- coding:utf-8 -*- -import datetime +from datetime import datetime, timedelta, date import decimal from nose.tools import assert_true, assert_equal from unittest import TestCase @@ -11,6 +11,8 @@ link_to_user_page, parse_pretty_date, format_pretty_date, + diff_datewise, + timestamps_to_now, ) from wikimetrics.metrics import NamespaceEdits @@ -18,12 +20,12 @@ class UtilsTest(TestCase): def test_better_encoder_date(self): - result = stringify(date_not_date_time=datetime.date(2013, 06, 01)) + result = stringify(date_not_date_time=date(2013, 06, 01)) assert_true(result.find('"date_not_date_time"') >= 0) assert_true(result.find('2013-06-01') >= 0) def test_better_encoder_datetime(self): - result = stringify(date_time=datetime.datetime(2013, 06, 01, 02, 03, 04)) + result = stringify(date_time=datetime(2013, 06, 01, 02, 03, 04)) assert_true(result.find('"date_time"') >= 0) assert_true(result.find('2013-06-01 02:03:04') >= 0) @@ -88,5 +90,54 @@ assert_true(True) def test_parse_pretty_date(self): - date = datetime.datetime(2012, 2, 3, 4, 5) + date = datetime(2012, 2, 3, 4, 5) assert_equal(date, parse_pretty_date(format_pretty_date(date))) + + +class TestUtil(TestCase): + def test_diff_datewise(self): + l = [] + l_just_dates = [] + r = [] + r_just_dates = [] + lp = 'blah%Y...%m...%d...%Hblahblah' + rp = 'neenee%Y%m%d%Hneenee' + + expect0 = set([datetime(2012, 6, 14, 13), datetime(2012, 11, 9, 3)]) + expect1 = set([datetime(2012, 6, 14, 14), datetime(2013, 11, 10, 22)]) + + for y in range(2012, 2014): + for m in range(1, 13): + # we're just diffing so we don't care about getting all days + for d in range(1, 28): + for h in range(0, 24): + x = datetime(y, m, d, h) + if not x in expect1: + l.append(datetime.strftime(x, lp)) + l_just_dates.append(x) + if not x in expect0: + r.append(datetime.strftime(x, rp)) + r_just_dates.append(x) + + result = diff_datewise(l, r, left_parse=lp, right_parse=rp) + self.assertEqual(result[0], expect0) + self.assertEqual(result[1], expect1) + + result = diff_datewise(l_just_dates, r, right_parse=rp) + self.assertEqual(result[0], expect0) + self.assertEqual(result[1], expect1) + + result = diff_datewise(l_just_dates, r_just_dates) + self.assertEqual(result[0], expect0) + self.assertEqual(result[1], expect1) + + def test_timestamps_to_now(self): + now = datetime.now() + start = now - timedelta(hours=2) + expect = [ + start, + start + timedelta(hours=1), + start + timedelta(hours=2), + ] + timestamps = timestamps_to_now(start, timedelta(hours=1)) + self.assertEqual(expect, list(timestamps)) diff --git a/wikimetrics/utils.py b/wikimetrics/utils.py index 9cadd5d..b8b5135 100644 --- a/wikimetrics/utils.py +++ b/wikimetrics/utils.py @@ -164,6 +164,57 @@ os.makedirs(full_path) +def diff_datewise(left, right, left_parse=None, right_parse=None): + """ + Parameters + left : a list of datetime strings or objects + right : a list of datetime strings or objects + left_parse : if left contains datetimes, None; else a strptime format + right_parse : if right contains datetimes, None; else a strptime format + + Returns + A tuple of two sets: + [0] : the datetime objects in left but not right + [1] : the datetime objects in right but not left + """ + + if left_parse: + left_set = set([ + datetime.strptime(l.strip(), left_parse) + for l in left if len(l.strip()) + ]) + else: + left_set = set(left) + + if right_parse: + right_set = set([ + datetime.strptime(r.strip(), right_parse) + for r in right if len(r.strip()) + ]) + else: + right_set = set(right) + + return (left_set - right_set, right_set - left_set) + + +def timestamps_to_now(start, increment): + """ + Generates timestamps from @start to datetime.now(), by @increment + + Parameters + start : the first generated timestamp + increment : the timedelta between the generated timestamps + + Returns + A generator that goes from @start to datetime.now() - x, + where x <= @increment + """ + now = datetime.now() + while start < now: + yield start + start += increment + + class Unauthorized(Exception): """ Different exception type to separate "unauthorized" errors from the rest -- To view, visit https://gerrit.wikimedia.org/r/119986 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I16245eacf86ba9d01de6e9f752fe1026ad504628 Gerrit-PatchSet: 1 Gerrit-Project: analytics/wikimetrics Gerrit-Branch: master Gerrit-Owner: Milimetric <dandree...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits