https://www.mediawiki.org/wiki/Special:Code/MediaWiki/115152
Revision: 115152 Author: rfaulk Date: 2012-05-07 14:22:45 +0000 (Mon, 07 May 2012) Log Message: ----------- allowed namespace to be specified in postings generation (in some cases searching outside of talk pages is necessary) Modified Paths: -------------- trunk/tools/wsor/message_templates/run_postings_and_metrics.py trunk/tools/wsor/message_templates/umetrics/postings.py Modified: trunk/tools/wsor/message_templates/run_postings_and_metrics.py =================================================================== --- trunk/tools/wsor/message_templates/run_postings_and_metrics.py 2012-05-07 09:54:15 UTC (rev 115151) +++ trunk/tools/wsor/message_templates/run_postings_and_metrics.py 2012-05-07 14:22:45 UTC (rev 115152) @@ -46,15 +46,16 @@ 131 : False, 132 : False, 133 : False, 134 : False, 135 : False, 136 : False, 137 : False, 138 : False, 139 : False, 140 : False, 141 : False, 142 : False, # ImageTaggingBot 117 : False, 118 : False, 119 : False, 120 : False, 121 : False, 122 : False, 123 : False, 124 : False, 125 : False, 126 : False, 127 : False, 128 : False, # CorenSearchBot 78 : False, 79 : False, 81 : False, 82 : False, # TWINKLE - 4 : True, 5 : True, # Welcome templates - chico - 143 : False, 144 : False, 145 : False, 146 : False # 28 bot + 4 : False, 5 : False, # Welcome templates - chico + 143 : False, 144 : False, 145 : False, 146 : False, # 28 bot + 147 : True # Rcsprinter bot } # template_indices = {78 : True} # Run postings and metrics - generator = 'editcounts' - postings = True + generator = 'warnings' + postings = False # postings_cmd = './postings -h db1047 --start=%(start_time)s --end=%(end_time)s --comment="%(rev_comment)s" --message="{{%(template)s}}" --outfilename postings_%(file_name)s.tsv' postings_cmd = './postings -h db42 --start=%(start_time)s --end=%(end_time)s --message="{{%(template)s}}" --outfilename postings_%(file_name)s.tsv' @@ -69,7 +70,7 @@ template_name = 'z' + str(key) logging.info('Generating postings for %s' % template_name) - name, start_ts, end_ts, comment, user, api_uri, use_rev_file = get_experiment(key) + name, start_ts, end_ts, comment, user, api_uri, use_rev_file, namespace = get_experiment(key) if key >= 60 and key <= 116: filename_part = start_ts[4:8] + '_' + end_ts[4:8] + '_' + template_name @@ -92,6 +93,8 @@ cmd += ' -a %s' % api_uri if use_rev_file != None: cmd += ' --use_in_file %s' % use_rev_file + if namespace != None: + cmd += ' --namespace %s' % namespace else: cmd = metrics_cmd % {'file_name' : filename_part, 'generator' : generator, 'fname_generator' : generator} @@ -114,6 +117,7 @@ comment = None api_uri = None use_rev_file = None + namespace = None if index >= 60 and index <= 77: test_handle = 'Huggle_3' @@ -179,9 +183,17 @@ user = '28bot' comment = '.*' + elif index == 147: + test_handle = 'RcsprinterBot' + start_ts = '20120119000000' + end_ts = '20120501000000' + user = 'RcsprinterBot' + comment = '.*' + namespace = 0 + logging.info('Processing %(test_handle)s from %(start_ts)s to %(end_ts)s on comment "%(comment)s" for user "%(user)s" ...' % {'test_handle' : test_handle, 'start_ts' : start_ts, 'end_ts' : end_ts, 'comment' : comment, 'user' : user}) - return test_handle, start_ts, end_ts, comment, user, api_uri, use_rev_file + return test_handle, start_ts, end_ts, comment, user, api_uri, use_rev_file, namespace """ Call main, exit when execution is complete Modified: trunk/tools/wsor/message_templates/umetrics/postings.py =================================================================== --- trunk/tools/wsor/message_templates/umetrics/postings.py 2012-05-07 09:54:15 UTC (rev 115151) +++ trunk/tools/wsor/message_templates/umetrics/postings.py 2012-05-07 14:22:45 UTC (rev 115152) @@ -151,6 +151,12 @@ help='indicates that revisions should be read from a file. Name is to be specified.', default='' ) + parser.add_argument( + '--namespace', + type=str, + help='Page namespace on which to read revisions.', + default=3 + ) args = parser.parse_args() @@ -220,7 +226,7 @@ line = in_file.readline() else: - for rev in db.getPostings(args.start, args.end, userName=args.user_name, commentRE=args.comment): + for rev in db.getPostings(args.start, args.end, userName=args.user_name, commentRE=args.comment, namespace=args.namespace): count += 1 revs.append(rev) if count % 100 == 0: LOGGING_STREAM.write("|") @@ -280,7 +286,7 @@ self.kwargs = kwargs self.conn = MySQLdb.connect(*args, **kwargs) - def getPostings(self, start, end, userName=None, commentRE=None): + def getPostings(self, start, end, userName=None, commentRE=None, namespace=3): cursor = self.conn.cursor(MySQLdb.cursors.SSDictCursor) query = """ @@ -294,7 +300,7 @@ FROM revision r INNER JOIN page p ON r.rev_page = p.page_id WHERE rev_timestamp BETWEEN %(start)s AND %(end)s - AND page_namespace = 3 + AND page_namespace = %(page_namespace)s """ if userName != None: @@ -302,14 +308,10 @@ if commentRE != None: query += 'AND rev_comment REGEXP %(comment_pattern)s\n' + query = query % {'start': start, 'end': end, 'user_name': userName, 'comment_pattern': commentRE.pattern, 'page_namespace' : namespace} + cursor.execute( - query, - { - 'start': start, - 'end': end, - 'user_name': userName, - 'comment_pattern': commentRE.pattern - } + query ) return cursor @@ -362,7 +364,12 @@ ) result = json.load(response) - diff = result['query']['pages'].values()[0]['revisions'][0]['*'] + try: + diff = result['query']['pages'].values()[0]['revisions'][0]['*'] + except KeyError: + sys.stderr.write("x") + diff = '' + pass # Add the diff tags such that the content is parsed as if it were a diff if type(diff) not in types.StringTypes: diff = '' _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs