http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92541

Revision: 92541
Author:   halfak
Date:     2011-07-19 16:24:11 +0000 (Tue, 19 Jul 2011)
Log Message:
-----------
added track_hugglers script

Modified Paths:
--------------
    trunk/tools/wsor/newbie_warnings/queries.sql
    trunk/tools/wsor/newbie_warnings/track_messages.py

Added Paths:
-----------
    trunk/tools/wsor/newbie_warnings/track_hugglers.py

Modified: trunk/tools/wsor/newbie_warnings/queries.sql
===================================================================
--- trunk/tools/wsor/newbie_warnings/queries.sql        2011-07-19 16:10:06 UTC 
(rev 92540)
+++ trunk/tools/wsor/newbie_warnings/queries.sql        2011-07-19 16:24:11 UTC 
(rev 92541)
@@ -83,6 +83,101 @@
 GROUP BY tp.page_title;
 
 
+SELECT rc_timestamp AS time 
+FROM recentchanges 
+ORDER BY rc_timestamp DESC 
+LIMIT 1
 
 
 
+SELECT  
+       reciever.user_id, 
+       reciever.user_name, 
+       count(*) AS messages_waiting
+FROM ( 
+SELECT DISTINCT p.page_title
+FROM revision r
+INNER JOIN halfak.huggler_sample h
+       ON r.rev_user = h.user_id
+       AND h.user_name NOT IN ("Tide rolls", "Falcon8765")
+INNER JOIN page p
+       ON r.rev_page = p.page_id
+AND p.page_namespace = 3
+AND r.rev_timestamp >= "20110705230000"
+) AS tp
+INNER JOIN user reciever
+       ON reciever.user_name = REPLACE(tp.page_title, "_", " ")
+INNER JOIN user_newtalk nt
+       ON reciever.user_id = nt.user_id
+GROUP BY reciever.user_id, reciever.user_name
+UNION
+SELECT  
+       NULL AS user_id,
+       tp.page_title AS user_name,
+       count(*) AS messages_waiting
+FROM ( 
+SELECT DISTINCT p.page_title
+FROM revision r
+INNER JOIN halfak.huggler_sample h
+       ON r.rev_user = h.user_id
+       AND h.user_name NOT IN ("Tide rolls", "Falcon8765")
+INNER JOIN page p
+       ON r.rev_page = p.page_id
+WHERE p.page_namespace = 3
+AND r.rev_timestamp >= "20110705230000"
+) AS tp
+INNER JOIN user_newtalk nt
+       ON tp.page_title = nt.user_ip
+GROUP BY tp.page_title;
+
+
+SELECT DISTINCT p.page_title AS title
+FROM revision r
+INNER JOIN halfak.listed_huggler h
+       ON r.rev_user = h.user_id
+INNER JOIN page p
+       ON r.rev_page = p.page_id
+WHERE p.page_namespace = 3
+AND r.rev_timestamp >= "20110719000000"
+AND r.rev_comment LIKE "%[[WP:HG%";
+
+
+SELECT DISTINCT p.page_title AS title
+FROM revision r
+INNER JOIN page p
+       ON r.rev_page = p.page_id
+WHERE p.page_namespace = 3
+AND r.rev_timestamp >= "20110719014743"
+AND r.rev_comment LIKE "%DERPDERPDERP42%";
+
+
+
+SELECT 
+       nt.user_id,
+       IFNULL(u.user_name, nt.user_ip) AS user_name,
+       count(*)
+FROM user_newtalk nt
+LEFT JOIN user u
+       ON u.user_id = nt.user_id
+WHERE u.user_name IN ("EpochFail")
+OR nt.user_ip IN ("EpochFail")
+GROUP BY nt.user_id, nt.user_ip, u.user_name;
+
+
+SELECT 
+       u.user_id,
+       u.user_name,
+       count(*) as messages
+FROM user_newtalk nt
+LEFT JOIN user u
+       ON u.user_id = nt.user_id
+WHERE u.user_name IN ("EpochFail")
+GROUP BY u.user_id, u.user_name
+UNION
+SELECT 
+       nt.user_ip as user_name,
+       NULL as user_id,
+       count(*) as messages
+FROM user_newtalk nt
+WHERE nt.user_ip IN ("EpochFail")
+GROUP BY nt.user_ip, NULL;

Added: trunk/tools/wsor/newbie_warnings/track_hugglers.py
===================================================================
--- trunk/tools/wsor/newbie_warnings/track_hugglers.py                          
(rev 0)
+++ trunk/tools/wsor/newbie_warnings/track_hugglers.py  2011-07-19 16:24:11 UTC 
(rev 92541)
@@ -0,0 +1,192 @@
+import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types, time
+import wmf
+
+def encode(v):
+       if v == None: return "\N"
+       
+       if type(v) == types.LongType:     v = int(v)
+       elif type(v) == types.UnicodeType: v = v.encode('utf-8')
+       
+       return str(v).encode("string-escape")
+
+
+def emit(event, p, time):
+       print(
+               "\t".join(encode(v) for v in [
+                       event,
+                       p['user_id'],
+                       p['user_name'],
+                       time
+               ])
+       )
+                       
+
+def main():
+       parser = argparse.ArgumentParser(
+               description=''
+       )
+       parser.add_argument(
+               '-c', '--cnf',
+               metavar="<path>",
+               type=str, 
+               help='the path to MySQL config info (defaults to ~/.my.cnf)',
+               default=os.path.expanduser("~/.my.cnf")
+       )
+       parser.add_argument(
+               '-s', '--host',
+               type=str, 
+               help='the database host to connect to (defaults to localhost)',
+               default="localhost"
+       )
+       parser.add_argument(
+               '-d', '--db',
+               type=str, 
+               help='the language db to run the query in (defaults to enwiki)',
+               default="enwiki"
+       )
+       parser.add_argument(
+               '-o', '--out',
+               type=lambda fn:open(fn, 'a+'), 
+               help='Where should output be appended',
+               default=sys.stdout
+       )
+       args = parser.parse_args()
+       
+       LOGGING_STREAM = sys.stderr
+       logging.basicConfig(
+               level=logging.DEBUG,
+               stream=LOGGING_STREAM,
+               format='%(asctime)s %(levelname)-8s %(message)s',
+               datefmt='%b-%d %H:%M:%S'
+       )
+       
+       logging.info("Connecting to %s:%s using %s." % (args.host, args.db, 
args.cnf))
+       db = Database(
+               host=args.host, 
+               db=args.db, 
+               read_default_file=args.cnf
+       )
+       
+       try:
+               oldPosts = {}
+               lastTime = db.getTime()
+               time.sleep(5)
+               while True:
+                       logging.info("Tracking %s posts.  Looking for new ones 
since %s." % (len(oldPosts), lastTime))
+                       newUsers = set(db.getHugglePostsSince(lastTime))
+                       currTime = db.getTime()
+                       currUsers = set()
+                       for p in db.getWaitingPosts(oldPosts.viewkeys() | 
newUsers):
+                               if p['user_name'] not in oldPosts:
+                                       #Found a new posting
+                                       LOGGING_STREAM.write(">")
+                                       p['posting'] = currTime
+                                       oldPosts[p['user_name']] = p
+                                       emit("received", p, currTime)
+                               elif p['messages'] < 
oldPosts[p['user_name']]['messages']:
+                                       #Looks like someone checked the message
+                                       LOGGING_STREAM.write("<")
+                                       emit("read", oldPosts[p['user_name']], 
currTime)
+                                       del oldPosts[p['user_name']]
+                               else:
+                                       #Same shit, different minute
+                                       pass
+                               
+                               currUsers.add(p['user_name'])
+                       
+                       for missing in oldPosts.viewkeys() - currUsers:
+                               LOGGING_STREAM.write("<")
+                               emit("read", oldPosts[missing], currTime)
+                               del oldPosts[missing]
+                       
+                       lastTime = currTime
+                       LOGGING_STREAM.write("\n")
+                       time.sleep(5)
+               
+       except KeyboardInterrupt:
+               logging.info("Keyboard interrupt detected.  Shutting down.")
+       except Exception as e:
+               logging.error(str(e))
+               
+       print(repr(oldPosts))
+       print(lastTime)
+       
+       
+       
+def safe(val):
+       return '"' + val.replace('"', '\\"') + '"'
+
+class Database:
+       
+       def __init__(self, *args, **kwargs):
+               self.args   = args
+               self.kwargs = kwargs
+               self.usersConn = MySQLdb.connect(*args, **kwargs)
+               
+       
+       
+       def getTime(self):
+               cursor = self.usersConn.cursor(MySQLdb.cursors.DictCursor)
+               cursor.execute(
+                       """
+                       SELECT rc_timestamp AS time 
+                       FROM recentchanges 
+                       ORDER BY rc_timestamp DESC 
+                       LIMIT 1
+                       """
+               )
+               self.usersConn.commit()
+               for row in cursor:
+                       return row['time']
+               
+       
+       def getHugglePostsSince(self, timestamp):
+               cursor = self.usersConn.cursor(MySQLdb.cursors.DictCursor)
+               cursor.execute("""
+                               SELECT DISTINCT p.page_title AS title
+                               FROM revision r
+                               INNER JOIN page p
+                                       ON r.rev_page = p.page_id
+                               WHERE p.page_namespace = 3
+                               AND r.rev_timestamp >= %(timestamp)s
+                               AND (
+                                       r.rev_comment LIKE %(like)s OR
+                                       r.rev_comment LIKE %(clue)s
+                               )
+                       """,
+                       {
+                               "timestamp": timestamp,
+                               "like":      "%" + "WP:HG" + "%",
+                               "clue":      "%" + "Warning" + "%"
+                       }
+               )
+               return (p['title'].replace("_", " ") for p in cursor)
+       
+       def getWaitingPosts(self, users):
+               cursor = self.usersConn.cursor(MySQLdb.cursors.DictCursor)
+               userString = ",".join(safe(u) for u in users)
+               if len(userString) != 0:
+                       cursor.execute("""
+                               SELECT 
+                                       u.user_id,
+                                       u.user_name,
+                                       count(*) as messages
+                               FROM user_newtalk nt
+                               LEFT JOIN user u
+                                       ON u.user_id = nt.user_id
+                               WHERE u.user_name IN (""" + userString + """)
+                               GROUP BY u.user_id, u.user_name
+                               UNION
+                               SELECT 
+                                       NULL as user_id,
+                                       nt.user_ip as user_name,
+                                       count(*) as messages
+                               FROM user_newtalk nt
+                               WHERE nt.user_ip IN (""" + userString + """)
+                               GROUP BY nt.user_ip, NULL
+                               """
+                       )
+                       for post in cursor:
+                               yield post
+       
+if __name__ == "__main__": main()

Modified: trunk/tools/wsor/newbie_warnings/track_messages.py
===================================================================
--- trunk/tools/wsor/newbie_warnings/track_messages.py  2011-07-19 16:10:06 UTC 
(rev 92540)
+++ trunk/tools/wsor/newbie_warnings/track_messages.py  2011-07-19 16:24:11 UTC 
(rev 92541)
@@ -40,7 +40,7 @@
        )
        parser.add_argument(
                '-o', '--out',
-               type=lambda fn:open(fn, 'a'), 
+               type=lambda fn:open(fn, 'a+'), 
                help='Where should output be appended',
                default=sys.stdout
        )
@@ -61,19 +61,19 @@
                read_default_file=args.cnf
        )
        
-       print(
+       args.out.write(
                "\t".join(
                        [db.getTime()]+
                        [
                                ":".join(
                                        [
-                                               e['user_id'],
-                                               e['user_name'],
-                                               e['messages_waiting']
+                                               encode(e['user_id']),
+                                               encode(e['user_name']),
+                                               encode(e['messages_waiting'])
                                        ]
-                               ) for e in db.getEditorsWithTalk()
+                               ) for e in db.getEditorsWithTalk(args.user_id)
                        ]
-               )
+               ) + "\n"
        )
        
        
@@ -97,7 +97,7 @@
                        LIMIT 1
                        """
                )
-               yield cursor.fetchone()['time']
+               return cursor.fetchone()['time']
                
        
        def getEditorsWithTalk(self, userId):
@@ -117,11 +117,10 @@
                                WHERE r.rev_user = %(user_id)s
                                AND p.page_namespace = 3
                        ) AS tp
-                       LEFT JOIN user reciever
+                       INNER JOIN user reciever
                                ON reciever.user_name = REPLACE(tp.page_title, 
"_", " ")
                        INNER JOIN user_newtalk nt
                                ON reciever.user_id = nt.user_id
-                               OR nt.user_ip = tp.page_title
                        GROUP BY reciever.user_id, reciever.user_name
                        """,
                        {


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to