http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92541
Revision: 92541 Author: halfak Date: 2011-07-19 16:24:11 +0000 (Tue, 19 Jul 2011) Log Message: ----------- added track_hugglers script Modified Paths: -------------- trunk/tools/wsor/newbie_warnings/queries.sql trunk/tools/wsor/newbie_warnings/track_messages.py Added Paths: ----------- trunk/tools/wsor/newbie_warnings/track_hugglers.py Modified: trunk/tools/wsor/newbie_warnings/queries.sql =================================================================== --- trunk/tools/wsor/newbie_warnings/queries.sql 2011-07-19 16:10:06 UTC (rev 92540) +++ trunk/tools/wsor/newbie_warnings/queries.sql 2011-07-19 16:24:11 UTC (rev 92541) @@ -83,6 +83,101 @@ GROUP BY tp.page_title; +SELECT rc_timestamp AS time +FROM recentchanges +ORDER BY rc_timestamp DESC +LIMIT 1 +SELECT + reciever.user_id, + reciever.user_name, + count(*) AS messages_waiting +FROM ( +SELECT DISTINCT p.page_title +FROM revision r +INNER JOIN halfak.huggler_sample h + ON r.rev_user = h.user_id + AND h.user_name NOT IN ("Tide rolls", "Falcon8765") +INNER JOIN page p + ON r.rev_page = p.page_id +AND p.page_namespace = 3 +AND r.rev_timestamp >= "20110705230000" +) AS tp +INNER JOIN user reciever + ON reciever.user_name = REPLACE(tp.page_title, "_", " ") +INNER JOIN user_newtalk nt + ON reciever.user_id = nt.user_id +GROUP BY reciever.user_id, reciever.user_name +UNION +SELECT + NULL AS user_id, + tp.page_title AS user_name, + count(*) AS messages_waiting +FROM ( +SELECT DISTINCT p.page_title +FROM revision r +INNER JOIN halfak.huggler_sample h + ON r.rev_user = h.user_id + AND h.user_name NOT IN ("Tide rolls", "Falcon8765") +INNER JOIN page p + ON r.rev_page = p.page_id +WHERE p.page_namespace = 3 +AND r.rev_timestamp >= "20110705230000" +) AS tp +INNER JOIN user_newtalk nt + ON tp.page_title = nt.user_ip +GROUP BY tp.page_title; + + +SELECT DISTINCT p.page_title AS title +FROM revision r +INNER JOIN halfak.listed_huggler h + ON r.rev_user = h.user_id +INNER JOIN page p + ON r.rev_page = p.page_id +WHERE p.page_namespace = 3 +AND r.rev_timestamp >= "20110719000000" +AND r.rev_comment LIKE "%[[WP:HG%"; + + +SELECT DISTINCT p.page_title AS title +FROM revision r +INNER JOIN page p + ON r.rev_page = p.page_id +WHERE p.page_namespace = 3 +AND r.rev_timestamp >= "20110719014743" +AND r.rev_comment LIKE "%DERPDERPDERP42%"; + + + +SELECT + nt.user_id, + IFNULL(u.user_name, nt.user_ip) AS user_name, + count(*) +FROM user_newtalk nt +LEFT JOIN user u + ON u.user_id = nt.user_id +WHERE u.user_name IN ("EpochFail") +OR nt.user_ip IN ("EpochFail") +GROUP BY nt.user_id, nt.user_ip, u.user_name; + + +SELECT + u.user_id, + u.user_name, + count(*) as messages +FROM user_newtalk nt +LEFT JOIN user u + ON u.user_id = nt.user_id +WHERE u.user_name IN ("EpochFail") +GROUP BY u.user_id, u.user_name +UNION +SELECT + nt.user_ip as user_name, + NULL as user_id, + count(*) as messages +FROM user_newtalk nt +WHERE nt.user_ip IN ("EpochFail") +GROUP BY nt.user_ip, NULL; Added: trunk/tools/wsor/newbie_warnings/track_hugglers.py =================================================================== --- trunk/tools/wsor/newbie_warnings/track_hugglers.py (rev 0) +++ trunk/tools/wsor/newbie_warnings/track_hugglers.py 2011-07-19 16:24:11 UTC (rev 92541) @@ -0,0 +1,192 @@ +import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types, time +import wmf + +def encode(v): + if v == None: return "\N" + + if type(v) == types.LongType: v = int(v) + elif type(v) == types.UnicodeType: v = v.encode('utf-8') + + return str(v).encode("string-escape") + + +def emit(event, p, time): + print( + "\t".join(encode(v) for v in [ + event, + p['user_id'], + p['user_name'], + time + ]) + ) + + +def main(): + parser = argparse.ArgumentParser( + description='' + ) + parser.add_argument( + '-c', '--cnf', + metavar="<path>", + type=str, + help='the path to MySQL config info (defaults to ~/.my.cnf)', + default=os.path.expanduser("~/.my.cnf") + ) + parser.add_argument( + '-s', '--host', + type=str, + help='the database host to connect to (defaults to localhost)', + default="localhost" + ) + parser.add_argument( + '-d', '--db', + type=str, + help='the language db to run the query in (defaults to enwiki)', + default="enwiki" + ) + parser.add_argument( + '-o', '--out', + type=lambda fn:open(fn, 'a+'), + help='Where should output be appended', + default=sys.stdout + ) + args = parser.parse_args() + + LOGGING_STREAM = sys.stderr + logging.basicConfig( + level=logging.DEBUG, + stream=LOGGING_STREAM, + format='%(asctime)s %(levelname)-8s %(message)s', + datefmt='%b-%d %H:%M:%S' + ) + + logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf)) + db = Database( + host=args.host, + db=args.db, + read_default_file=args.cnf + ) + + try: + oldPosts = {} + lastTime = db.getTime() + time.sleep(5) + while True: + logging.info("Tracking %s posts. Looking for new ones since %s." % (len(oldPosts), lastTime)) + newUsers = set(db.getHugglePostsSince(lastTime)) + currTime = db.getTime() + currUsers = set() + for p in db.getWaitingPosts(oldPosts.viewkeys() | newUsers): + if p['user_name'] not in oldPosts: + #Found a new posting + LOGGING_STREAM.write(">") + p['posting'] = currTime + oldPosts[p['user_name']] = p + emit("received", p, currTime) + elif p['messages'] < oldPosts[p['user_name']]['messages']: + #Looks like someone checked the message + LOGGING_STREAM.write("<") + emit("read", oldPosts[p['user_name']], currTime) + del oldPosts[p['user_name']] + else: + #Same shit, different minute + pass + + currUsers.add(p['user_name']) + + for missing in oldPosts.viewkeys() - currUsers: + LOGGING_STREAM.write("<") + emit("read", oldPosts[missing], currTime) + del oldPosts[missing] + + lastTime = currTime + LOGGING_STREAM.write("\n") + time.sleep(5) + + except KeyboardInterrupt: + logging.info("Keyboard interrupt detected. Shutting down.") + except Exception as e: + logging.error(str(e)) + + print(repr(oldPosts)) + print(lastTime) + + + +def safe(val): + return '"' + val.replace('"', '\\"') + '"' + +class Database: + + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + self.usersConn = MySQLdb.connect(*args, **kwargs) + + + + def getTime(self): + cursor = self.usersConn.cursor(MySQLdb.cursors.DictCursor) + cursor.execute( + """ + SELECT rc_timestamp AS time + FROM recentchanges + ORDER BY rc_timestamp DESC + LIMIT 1 + """ + ) + self.usersConn.commit() + for row in cursor: + return row['time'] + + + def getHugglePostsSince(self, timestamp): + cursor = self.usersConn.cursor(MySQLdb.cursors.DictCursor) + cursor.execute(""" + SELECT DISTINCT p.page_title AS title + FROM revision r + INNER JOIN page p + ON r.rev_page = p.page_id + WHERE p.page_namespace = 3 + AND r.rev_timestamp >= %(timestamp)s + AND ( + r.rev_comment LIKE %(like)s OR + r.rev_comment LIKE %(clue)s + ) + """, + { + "timestamp": timestamp, + "like": "%" + "WP:HG" + "%", + "clue": "%" + "Warning" + "%" + } + ) + return (p['title'].replace("_", " ") for p in cursor) + + def getWaitingPosts(self, users): + cursor = self.usersConn.cursor(MySQLdb.cursors.DictCursor) + userString = ",".join(safe(u) for u in users) + if len(userString) != 0: + cursor.execute(""" + SELECT + u.user_id, + u.user_name, + count(*) as messages + FROM user_newtalk nt + LEFT JOIN user u + ON u.user_id = nt.user_id + WHERE u.user_name IN (""" + userString + """) + GROUP BY u.user_id, u.user_name + UNION + SELECT + NULL as user_id, + nt.user_ip as user_name, + count(*) as messages + FROM user_newtalk nt + WHERE nt.user_ip IN (""" + userString + """) + GROUP BY nt.user_ip, NULL + """ + ) + for post in cursor: + yield post + +if __name__ == "__main__": main() Modified: trunk/tools/wsor/newbie_warnings/track_messages.py =================================================================== --- trunk/tools/wsor/newbie_warnings/track_messages.py 2011-07-19 16:10:06 UTC (rev 92540) +++ trunk/tools/wsor/newbie_warnings/track_messages.py 2011-07-19 16:24:11 UTC (rev 92541) @@ -40,7 +40,7 @@ ) parser.add_argument( '-o', '--out', - type=lambda fn:open(fn, 'a'), + type=lambda fn:open(fn, 'a+'), help='Where should output be appended', default=sys.stdout ) @@ -61,19 +61,19 @@ read_default_file=args.cnf ) - print( + args.out.write( "\t".join( [db.getTime()]+ [ ":".join( [ - e['user_id'], - e['user_name'], - e['messages_waiting'] + encode(e['user_id']), + encode(e['user_name']), + encode(e['messages_waiting']) ] - ) for e in db.getEditorsWithTalk() + ) for e in db.getEditorsWithTalk(args.user_id) ] - ) + ) + "\n" ) @@ -97,7 +97,7 @@ LIMIT 1 """ ) - yield cursor.fetchone()['time'] + return cursor.fetchone()['time'] def getEditorsWithTalk(self, userId): @@ -117,11 +117,10 @@ WHERE r.rev_user = %(user_id)s AND p.page_namespace = 3 ) AS tp - LEFT JOIN user reciever + INNER JOIN user reciever ON reciever.user_name = REPLACE(tp.page_title, "_", " ") INNER JOIN user_newtalk nt ON reciever.user_id = nt.user_id - OR nt.user_ip = tp.page_title GROUP BY reciever.user_id, reciever.user_name """, { _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs