Yurik has uploaded a new change for review.
https://gerrit.wikimedia.org/r/161806
Change subject: filter out last day of data, simplify iszero->zero
......................................................................
filter out last day of data, simplify iszero->zero
Change-Id: If414ca8774a307f75402a4ffb6ad7aa5494307d3
---
M scripts/weblogs.py
1 file changed, 6 insertions(+), 3 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/zero-sms
refs/changes/06/161806/1
diff --git a/scripts/weblogs.py b/scripts/weblogs.py
index 1d0a136..3fbd90c 100644
--- a/scripts/weblogs.py
+++ b/scripts/weblogs.py
@@ -257,7 +257,7 @@
(ipset in conf.ipsets):
isZero = True
break
- vals[9] = u'INCL' if isZero else u'EXCL'
+ vals[9] = u'yes' if isZero else u'no'
else:
vals[9] = ''
else:
@@ -284,14 +284,17 @@
else:
df = DataFrame(stats, columns=columnHeaders11)
+ # filter type==DATA
data = df[df['type'] == 'DATA']
- data['iszero'] = data['zero'].map(lambda v: 'yes' if v == 'INCL' else
'no')
+ # filter out last date
+ lastDate = data.date.max()
+ data = data[data.date < lastDate]
xcs = list(data.xcs.unique())
for id in xcs:
s = StringIO.StringIO()
- pivot_table(data[data.xcs == id], 'count', ['date', 'iszero'],
aggfunc=np.sum).to_csv(s, header=True)
+ pivot_table(data[data.xcs == id], 'count', ['date', 'zero'],
aggfunc=np.sum).to_csv(s, header=True)
result = s.getvalue()
# sortColumns = ['date', 'via', 'ipset', 'https', 'lang',
'subdomain', 'site', 'zero']
--
To view, visit https://gerrit.wikimedia.org/r/161806
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: If414ca8774a307f75402a4ffb6ad7aa5494307d3
Gerrit-PatchSet: 1
Gerrit-Project: analytics/zero-sms
Gerrit-Branch: master
Gerrit-Owner: Yurik <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits