Milimetric has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/119986

Change subject: Add utility functions for scheduling
......................................................................

Add utility functions for scheduling

Adds two utility functions that are useful for scheduled task implementation.
* diff_datewise finds differences between lists of dates,
and supports parsing those lists from lists of strings.
* timestamps_to_now gives you an efficient datastructure that enumerates
dates from some start to now, at some interval.

Card: analytics 1378
Change-Id: I16245eacf86ba9d01de6e9f752fe1026ad504628
---
M tests/test_utils/test_one_off_functions.py
M wikimetrics/utils.py
2 files changed, 106 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics 
refs/changes/86/119986/1

diff --git a/tests/test_utils/test_one_off_functions.py 
b/tests/test_utils/test_one_off_functions.py
index 407d15c..bf1f488 100644
--- a/tests/test_utils/test_one_off_functions.py
+++ b/tests/test_utils/test_one_off_functions.py
@@ -1,5 +1,5 @@
 # -*- coding:utf-8 -*-
-import datetime
+from datetime import datetime, timedelta, date
 import decimal
 from nose.tools import assert_true, assert_equal
 from unittest import TestCase
@@ -11,6 +11,8 @@
     link_to_user_page,
     parse_pretty_date,
     format_pretty_date,
+    diff_datewise,
+    timestamps_to_now,
 )
 from wikimetrics.metrics import NamespaceEdits
 
@@ -18,12 +20,12 @@
 class UtilsTest(TestCase):
     
     def test_better_encoder_date(self):
-        result = stringify(date_not_date_time=datetime.date(2013, 06, 01))
+        result = stringify(date_not_date_time=date(2013, 06, 01))
         assert_true(result.find('"date_not_date_time"') >= 0)
         assert_true(result.find('2013-06-01') >= 0)
     
     def test_better_encoder_datetime(self):
-        result = stringify(date_time=datetime.datetime(2013, 06, 01, 02, 03, 
04))
+        result = stringify(date_time=datetime(2013, 06, 01, 02, 03, 04))
         assert_true(result.find('"date_time"') >= 0)
         assert_true(result.find('2013-06-01 02:03:04') >= 0)
     
@@ -88,5 +90,54 @@
         assert_true(True)
     
     def test_parse_pretty_date(self):
-        date = datetime.datetime(2012, 2, 3, 4, 5)
+        date = datetime(2012, 2, 3, 4, 5)
         assert_equal(date, parse_pretty_date(format_pretty_date(date)))
+
+
+class TestUtil(TestCase):
+    def test_diff_datewise(self):
+        l = []
+        l_just_dates = []
+        r = []
+        r_just_dates = []
+        lp = 'blah%Y...%m...%d...%Hblahblah'
+        rp = 'neenee%Y%m%d%Hneenee'
+        
+        expect0 = set([datetime(2012, 6, 14, 13), datetime(2012, 11, 9, 3)])
+        expect1 = set([datetime(2012, 6, 14, 14), datetime(2013, 11, 10, 22)])
+        
+        for y in range(2012, 2014):
+            for m in range(1, 13):
+                # we're just diffing so we don't care about getting all days
+                for d in range(1, 28):
+                    for h in range(0, 24):
+                        x = datetime(y, m, d, h)
+                        if not x in expect1:
+                            l.append(datetime.strftime(x, lp))
+                            l_just_dates.append(x)
+                        if not x in expect0:
+                            r.append(datetime.strftime(x, rp))
+                            r_just_dates.append(x)
+        
+        result = diff_datewise(l, r, left_parse=lp, right_parse=rp)
+        self.assertEqual(result[0], expect0)
+        self.assertEqual(result[1], expect1)
+        
+        result = diff_datewise(l_just_dates, r, right_parse=rp)
+        self.assertEqual(result[0], expect0)
+        self.assertEqual(result[1], expect1)
+        
+        result = diff_datewise(l_just_dates, r_just_dates)
+        self.assertEqual(result[0], expect0)
+        self.assertEqual(result[1], expect1)
+    
+    def test_timestamps_to_now(self):
+        now = datetime.now()
+        start = now - timedelta(hours=2)
+        expect = [
+            start,
+            start + timedelta(hours=1),
+            start + timedelta(hours=2),
+        ]
+        timestamps = timestamps_to_now(start, timedelta(hours=1))
+        self.assertEqual(expect, list(timestamps))
diff --git a/wikimetrics/utils.py b/wikimetrics/utils.py
index 9cadd5d..b8b5135 100644
--- a/wikimetrics/utils.py
+++ b/wikimetrics/utils.py
@@ -164,6 +164,57 @@
             os.makedirs(full_path)
 
 
+def diff_datewise(left, right, left_parse=None, right_parse=None):
+    """
+    Parameters
+        left        : a list of datetime strings or objects
+        right       : a list of datetime strings or objects
+        left_parse  : if left contains datetimes, None; else a strptime format
+        right_parse : if right contains datetimes, None; else a strptime format
+
+    Returns
+        A tuple of two sets:
+        [0] : the datetime objects in left but not right
+        [1] : the datetime objects in right but not left
+    """
+    
+    if left_parse:
+        left_set = set([
+            datetime.strptime(l.strip(), left_parse)
+            for l in left if len(l.strip())
+        ])
+    else:
+        left_set = set(left)
+    
+    if right_parse:
+        right_set = set([
+            datetime.strptime(r.strip(), right_parse)
+            for r in right if len(r.strip())
+        ])
+    else:
+        right_set = set(right)
+    
+    return (left_set - right_set, right_set - left_set)
+
+
+def timestamps_to_now(start, increment):
+    """
+    Generates timestamps from @start to datetime.now(), by @increment
+    
+    Parameters
+        start       : the first generated timestamp
+        increment   : the timedelta between the generated timestamps
+    
+    Returns
+        A generator that goes from @start to datetime.now() - x,
+        where x <= @increment
+    """
+    now = datetime.now()
+    while start < now:
+        yield start
+        start += increment
+
+
 class Unauthorized(Exception):
     """
     Different exception type to separate "unauthorized" errors from the rest

-- 
To view, visit https://gerrit.wikimedia.org/r/119986
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I16245eacf86ba9d01de6e9f752fe1026ad504628
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: Milimetric <dandree...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to