https://www.mediawiki.org/wiki/Special:Code/MediaWiki/115152

Revision: 115152
Author:   rfaulk
Date:     2012-05-07 14:22:45 +0000 (Mon, 07 May 2012)
Log Message:
-----------
allowed namespace to be specified in postings generation (in some cases 
searching outside of talk pages is necessary)

Modified Paths:
--------------
    trunk/tools/wsor/message_templates/run_postings_and_metrics.py
    trunk/tools/wsor/message_templates/umetrics/postings.py

Modified: trunk/tools/wsor/message_templates/run_postings_and_metrics.py
===================================================================
--- trunk/tools/wsor/message_templates/run_postings_and_metrics.py      
2012-05-07 09:54:15 UTC (rev 115151)
+++ trunk/tools/wsor/message_templates/run_postings_and_metrics.py      
2012-05-07 14:22:45 UTC (rev 115152)
@@ -46,15 +46,16 @@
                         131 : False, 132 : False, 133 : False, 134 : False, 
135 : False, 136 : False, 137 : False, 138 : False, 139 : False, 140 : False, 
141 : False, 142 : False, # ImageTaggingBot
                         117 : False, 118 : False, 119 : False, 120 : False, 
121 : False, 122 : False, 123 : False, 124 : False, 125 : False, 126 : False, 
127 : False, 128 : False, # CorenSearchBot
                         78 : False, 79 : False, 81 : False, 82 : False, # 
TWINKLE
-                        4 : True, 5 : True,  # Welcome templates - chico
-                        143 : False, 144 : False, 145 : False, 146 : False  # 
28 bot
+                        4 : False, 5 : False,  # Welcome templates - chico
+                        143 : False, 144 : False, 145 : False, 146 : False,  # 
28 bot
+                        147 : True # Rcsprinter bot
     }
     #    template_indices = {78 : True}
     # Run postings and metrics
 
 
-    generator = 'editcounts'
-    postings = True
+    generator = 'warnings'
+    postings = False
 
     # postings_cmd = './postings -h db1047 --start=%(start_time)s 
--end=%(end_time)s --comment="%(rev_comment)s" --message="{{%(template)s}}" 
--outfilename postings_%(file_name)s.tsv'
     postings_cmd = './postings -h db42 --start=%(start_time)s 
--end=%(end_time)s --message="{{%(template)s}}" --outfilename 
postings_%(file_name)s.tsv'
@@ -69,7 +70,7 @@
             template_name = 'z' + str(key)
             logging.info('Generating postings for %s' % template_name)
 
-            name, start_ts, end_ts, comment, user, api_uri, use_rev_file  = 
get_experiment(key)
+            name, start_ts, end_ts, comment, user, api_uri, use_rev_file, 
namespace  = get_experiment(key)
 
             if key >= 60 and key <= 116:
                 filename_part = start_ts[4:8] + '_' + end_ts[4:8] + '_' + 
template_name
@@ -92,6 +93,8 @@
                     cmd +=  ' -a %s' % api_uri
                 if use_rev_file != None:
                     cmd += ' --use_in_file %s' % use_rev_file
+                if namespace != None:
+                    cmd += ' --namespace %s' % namespace
 
             else:
                 cmd = metrics_cmd % {'file_name' : filename_part, 'generator' 
: generator, 'fname_generator' : generator}
@@ -114,6 +117,7 @@
     comment = None
     api_uri = None
     use_rev_file = None
+    namespace = None
 
     if index >= 60 and index <= 77:
         test_handle = 'Huggle_3'
@@ -179,9 +183,17 @@
         user = '28bot'
         comment = '.*'
 
+    elif index == 147:
+        test_handle = 'RcsprinterBot'
+        start_ts = '20120119000000'
+        end_ts = '20120501000000'
+        user = 'RcsprinterBot'
+        comment = '.*'
+        namespace = 0
+
     logging.info('Processing %(test_handle)s from %(start_ts)s to %(end_ts)s 
on comment "%(comment)s" for user "%(user)s" ...' % {'test_handle' : 
test_handle, 'start_ts' : start_ts, 'end_ts' : end_ts, 'comment' : comment, 
'user' : user})
 
-    return test_handle, start_ts, end_ts, comment, user, api_uri, use_rev_file
+    return test_handle, start_ts, end_ts, comment, user, api_uri, 
use_rev_file, namespace
 
 """
     Call main, exit when execution is complete

Modified: trunk/tools/wsor/message_templates/umetrics/postings.py
===================================================================
--- trunk/tools/wsor/message_templates/umetrics/postings.py     2012-05-07 
09:54:15 UTC (rev 115151)
+++ trunk/tools/wsor/message_templates/umetrics/postings.py     2012-05-07 
14:22:45 UTC (rev 115152)
@@ -151,6 +151,12 @@
         help='indicates that revisions should be read from a file.  Name is to 
be specified.',
         default=''
     )
+    parser.add_argument(
+        '--namespace',
+        type=str,
+        help='Page namespace on which to read revisions.',
+        default=3
+    )
 
     args = parser.parse_args()
 
@@ -220,7 +226,7 @@
             line = in_file.readline()
     else:
 
-        for rev in db.getPostings(args.start, args.end, 
userName=args.user_name, commentRE=args.comment):
+        for rev in db.getPostings(args.start, args.end, 
userName=args.user_name, commentRE=args.comment, namespace=args.namespace):
             count += 1
             revs.append(rev)
             if count % 100 == 0: LOGGING_STREAM.write("|")
@@ -280,7 +286,7 @@
         self.kwargs = kwargs
         self.conn   = MySQLdb.connect(*args, **kwargs)
 
-    def getPostings(self, start, end, userName=None, commentRE=None):
+    def getPostings(self, start, end, userName=None, commentRE=None, 
namespace=3):
 
         cursor = self.conn.cursor(MySQLdb.cursors.SSDictCursor)
         query = """
@@ -294,7 +300,7 @@
                        FROM revision r
                        INNER JOIN page p ON r.rev_page = p.page_id
                        WHERE rev_timestamp BETWEEN %(start)s AND %(end)s
-                       AND page_namespace = 3
+                       AND page_namespace = %(page_namespace)s
                        """
 
         if userName != None:
@@ -302,14 +308,10 @@
         if commentRE != None:
             query += 'AND rev_comment REGEXP %(comment_pattern)s\n'
 
+        query = query % {'start': start, 'end': end, 'user_name': userName, 
'comment_pattern': commentRE.pattern, 'page_namespace' : namespace}
+
         cursor.execute(
-            query,
-                {
-                'start': start,
-                'end': end,
-                'user_name': userName,
-                'comment_pattern': commentRE.pattern
-            }
+            query
         )
 
         return cursor
@@ -362,7 +364,12 @@
                     )
 
                     result = json.load(response)
-                    diff = 
result['query']['pages'].values()[0]['revisions'][0]['*']
+                    try:
+                        diff = 
result['query']['pages'].values()[0]['revisions'][0]['*']
+                    except KeyError:
+                        sys.stderr.write("x")
+                        diff = ''
+                        pass
 
                     # Add the diff tags such that the content is parsed as if 
it were a diff
                     if type(diff) not in types.StringTypes: diff = ''


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to