http://www.mediawiki.org/wiki/Special:Code/MediaWiki/89427
Revision: 89427 Author: halfak Date: 2011-06-03 17:10:08 +0000 (Fri, 03 Jun 2011) Log Message: ----------- added catgory editors script (not tested) Added Paths: ----------- trunk/tools/wsor/category_editors/get_category_editors.py Added: trunk/tools/wsor/category_editors/get_category_editors.py =================================================================== --- trunk/tools/wsor/category_editors/get_category_editors.py (rev 0) +++ trunk/tools/wsor/category_editors/get_category_editors.py 2011-06-03 17:10:08 UTC (rev 89427) @@ -0,0 +1,128 @@ +import pymongo, logging, time, argparse, sys +from collections import deque + + + +def main(args): + LOGGING_STREAM = sys.stderr + logging.basicConfig( + level=logging.DEBUG, + stream=LOGGING_STREAM, + format='%(asctime)s %(levelname)-8s %(message)s', + datefmt='%b-%d %H:%M:%S' + ) + + logging.info("Connecting to mongo.") + db = pymongo.Connection().wikilytics + + logging.info("Getting arbitration article ids.") + catIds = set([ + a['id'] for a in + db.enwiki_articles_dataset.find({'category': args.category}) + ]) + logging.info("Found %s articles with in '%s' category." % (len(catIds), args.category)) + + #Printing headers + print( + "\t".join([ + 'username', + 'user_id', + 'month', + 'year', + 'edits' + ]) + ) + def limitPeriod(period): + def limit(item, l): + return ( + time.mktime(item['date']) - + time.mktime(l[0]['date']) + ) < period + + for editor in db.enwiki_editors_raw.find(): + recent = LimQueue(limit=limitPeriod(args.time)) + for year, month, edits in get_months_of_edits(editor['edits']): + catEdits = [e for e in edits if e['article'] in catIds] + if len(catEdits) >= args.n: + print( + "\t".join(clean(v) for v in [ + editor['username'], + editor['editor'], + year, + month, + len(catEdits) + ]) + ) + + + + + +def clean(value): + if value == None: + return "\N" + else: + return str(value).replace("\\", "\\\\").replace("\t", "\\t").replace("\n", "\\n") + +def get_months_of_edits(edits): + for year, edits in edits.items(): + #set + currMonth = year[0]['date'].strftime("%m") + monthEdits = [] + for edit in edits: + month = edit['date'].strftime("%m") + if month != currMonth: + yield (year, month, monthEdits) + + #reset + currMonth = month + monthEdits = [] + + monthEdits.append(edit) + + yield (year, month, monthEdits) + + +def capitalize(word): + if len(word) < 1: + return word + else: + return word[0].capitalize() + word[1:] + + + +"""class LimQueue(list): + + def __init__(self, iterable=[], limit=lambda l, item: True): + list.__init__(self, iterable) + self.limit = limit + + def append(self, item): + expectoration = [] + while not self.limit(self, item): + expectoration.append(self.pop(0)) + + return expectoration +""" + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Finds editors that made at least some number of ' + + 'edits to a category of articles in a month. ' + + 'This script prints one row for each editor-month ' + + 'with enough edits to a category of articles.' + ) + parser.add_argument( + 'n', + type=int, + help='the threshold number of edits per time period in a ' + + 'category for inclusion' + ) + parser.add_argument( + 'category', + type=capitalize, + help='the category in which to search for edits' + ) + args = parser.parse_args() + main(args) + _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs