Xqt has uploaded a new change for review. https://gerrit.wikimedia.org/r/95761
Change subject: [PEP8] changes for archivebot.py ...................................................................... [PEP8] changes for archivebot.py Change-Id: Ibec267e2d3e592185b8ed1d7f4f0fe365cb39d4d --- M archivebot.py 1 file changed, 141 insertions(+), 108 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/compat refs/changes/61/95761/1 diff --git a/archivebot.py b/archivebot.py index d0f79ee..79a0faf 100644 --- a/archivebot.py +++ b/archivebot.py @@ -76,21 +76,30 @@ # __version__ = '$Id$' # -import wikipedia as pywikibot -from pywikibot import i18n -import pagegenerators, query -Site = pywikibot.getSite() - -import os, re, time, locale, traceback, string, urllib, unicodedata - -try: #Get a constructor for the MD5 hash object +import os +import re +import time +import locale +import traceback +import string +import urllib +import unicodedata +try: # Get a constructor for the MD5 hash object import hashlib new_hash = hashlib.md5 -except ImportError: #Old python? +except ImportError: # Old python? import md5 new_hash = md5.md5 +import wikipedia as pywikibot +from pywikibot import i18n +import pagegenerators +import query + + +Site = pywikibot.getSite() language = Site.language() + def message(key, lang=Site.language()): return i18n.twtranslate(lang, key) @@ -102,7 +111,9 @@ class MissingConfigError(pywikibot.Error): """The config is missing in the header (either it's in one of the threads - or transcluded from another page).""" + or transcluded from another page). + + """ class AlgorithmError(MalformedConfigError): @@ -111,20 +122,25 @@ class ArchiveSecurityError(pywikibot.Error): """Archive is not a subpage of page being archived and key not specified - (or incorrect).""" + (or incorrect). + + """ def str2time(str): """Accepts a string defining a time period: 7d - 7 days 36h - 36 hours - Returns the corresponding time, measured in seconds.""" + Returns the corresponding time, measured in seconds. + + """ if str[-1] == 'd': - return int(str[:-1])*24*3600 + return int(str[:-1]) * 24 * 3600 elif str[-1] == 'h': - return int(str[:-1])*3600 + return int(str[:-1]) * 3600 else: return int(str) + def str2size(str): """Accepts a string defining a size: @@ -132,47 +148,57 @@ 150K - 150 kilobytes 2M - 2 megabytes Returns a tuple (size,unit), where size is an integer and unit is - 'B' (bytes) or 'T' (threads).""" - if str[-1] in string.digits: #TODO: de-uglify - return (int(str),'B') + 'B' (bytes) or 'T' (threads). + + """ + if str[-1] in string.digits: # TODO: de-uglify + return (int(str), 'B') elif str[-1] in ['K', 'k']: - return (int(str[:-1])*1024,'B') + return (int(str[:-1]) * 1024, 'B') elif str[-1] == 'M': - return (int(str[:-1])*1024*1024,'B') + return (int(str[:-1]) * 1024 * 1024, 'B') elif str[-1] == 'T': - return (int(str[:-1]),'T') + return (int(str[:-1]), 'T') else: - return (int(str[:-1])*1024,'B') + return (int(str[:-1]) * 1024, 'B') + def int2month(num): """Returns the locale's full name of month 'num' (1-12).""" if hasattr(locale, 'nl_langinfo'): - return locale.nl_langinfo(locale.MON_1+num-1).decode('utf-8') + return locale.nl_langinfo(locale.MON_1 + num - 1).decode('utf-8') Months = ['january', 'february', 'march', 'april', 'may_long', 'june', 'july', 'august', 'september', 'october', 'november', 'december'] - return Site.mediawiki_message(Months[num-1]) + return Site.mediawiki_message(Months[num - 1]) + def int2month_short(num): """Returns the locale's abbreviated name of month 'num' (1-12).""" if hasattr(locale, 'nl_langinfo'): #filter out non-alpha characters - return ''.join([c for c in locale.nl_langinfo(locale.ABMON_1+num-1).decode('utf-8') if c.isalpha()]) + return ''.join([c for c in + locale.nl_langinfo( + locale.ABMON_1 + num - 1).decode('utf-8') + if c.isalpha()]) Months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] - return Site.mediawiki_message(Months[num-1]) + return Site.mediawiki_message(Months[num - 1]) + def txt2timestamp(txt, format): """Attempts to convert the timestamp 'txt' according to given 'format'. - On success, returns the time tuple; on failure, returns None.""" + On success, returns the time tuple; on failure, returns None. + + """ ## print txt, format try: - return time.strptime(txt,format) + return time.strptime(txt, format) except ValueError: try: - return time.strptime(txt.encode('utf8'),format) + return time.strptime(txt.encode('utf8'), format) except: pass - return None + def generateTransclusions(Site, template, namespaces=[]): pywikibot.output(u'Fetching template transclusions...') @@ -186,12 +212,14 @@ class DiscussionThread(object): - """An object representing a discussion thread on a page, that is something of the form: + """An object representing a discussion thread on a page, that is something + of the form: == Title of thread == Thread content here. ~~~~ :Reply, etc. ~~~~ + """ def __init__(self, title): @@ -201,7 +229,7 @@ def __repr__(self): return '%s("%s",%d bytes)' \ - % (self.__class__.__name__,self.title,len(self.content)) + % (self.__class__.__name__, self.title, len(self.content)) def feedLine(self, line): if not self.content and not line: @@ -216,33 +244,38 @@ # 2007. december 8., 13:42 (CET) TM = re.search(r'(\d\d):(\d\d), (\d\d?) (\S+) (\d\d\d\d) \(.*?\)', line) if not TM: - TM = re.search(r'(\d\d):(\d\d), (\S+) (\d\d?), (\d\d\d\d) \(.*?\)', line) + TM = re.search(r'(\d\d):(\d\d), (\S+) (\d\d?), (\d\d\d\d) \(.*?\)', + line) if not TM: - TM = re.search(r'(\d{4})\. (\S+) (\d\d?)\., (\d\d:\d\d) \(.*?\)', line) + TM = re.search(r'(\d{4})\. (\S+) (\d\d?)\., (\d\d:\d\d) \(.*?\)', + line) # 18. apr 2006 kl.18:39 (UTC) # 4. nov 2006 kl. 20:46 (CET) if not TM: - TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kl\.\W*(\d\d):(\d\d) \(.*?\)', line) + TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kl\.\W*(\d\d):(\d\d) \(.*?\)', + line) #3. joulukuuta 2008 kello 16.26 (EET) if not TM: - TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d) \(.*?\)', line) + TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d) \(.*?\)', + line) if not TM: # 14:23, 12. Jan. 2009 (UTC) pat = re.compile(r'(\d\d):(\d\d), (\d\d?)\. (\S+)\.? (\d\d\d\d) \((?:UTC|CES?T)\)') TM = pat.search(line) # ro.wiki: 4 august 2012 13:01 (EEST) if not TM: - TM = re.search(r'(\d\d?) (\S+) (\d\d\d\d) (\d\d):(\d\d) \(.*?\)', line) + TM = re.search(r'(\d\d?) (\S+) (\d\d\d\d) (\d\d):(\d\d) \(.*?\)', + line) if TM: # Strip away all diacritics in the Mn ('Mark, non-spacing') category - # NFD decomposition splits combined characters (e.g. 'ä", LATIN SMALL - # LETTER A WITH DIAERESIS) into two entities: LATIN SMALL LETTER A - # and COMBINING DIAERESIS. The latter falls in the Mn category and is - # filtered out, resuling in 'a'. + # NFD decomposition splits combined characters (e.g. 'ä", + # LATIN SMALL LETTER A WITH DIAERESIS) into two entities: + # LATIN SMALL LETTER A and COMBINING DIAERESIS. The latter falls + # in the Mn category and is filtered out, resuling in 'a'. _TM = ''.join(c for c in unicodedata.normalize('NFD', TM.group(0)) - if unicodedata.category(c) != 'Mn') + if unicodedata.category(c) != 'Mn') - TIME = txt2timestamp(_TM,"%d. %b %Y kl. %H:%M (%Z)") + TIME = txt2timestamp(_TM, "%d. %b %Y kl. %H:%M (%Z)") if not TIME: TIME = txt2timestamp(_TM, "%Y. %B %d., %H:%M (%Z)") if not TIME: @@ -264,7 +297,7 @@ if not TIME: TIME = txt2timestamp(_TM, "%H:%M, %B %d, %Y (%Z)") if not TIME: - TIME = txt2timestamp(_TM,"%d. %Bta %Y kello %H.%M (%Z)") + TIME = txt2timestamp(_TM, "%d. %Bta %Y kello %H.%M (%Z)") if not TIME: TIME = txt2timestamp(_TM, "%d %B %Y %H:%M (%Z)") if not TIME: @@ -282,9 +315,9 @@ def toText(self): return "== " + self.title + ' ==\n\n' + self.content - def shouldBeArchived(self,Archiver): + def shouldBeArchived(self, Archiver): algo = Archiver.get('algo') - reT = re.search(r'^old\((.*)\)$',algo) + reT = re.search(r'^old\((.*)\)$', algo) if reT: if not self.timestamp: return '' @@ -298,7 +331,9 @@ class DiscussionPage(pywikibot.Page): """A class that represents a single discussion page as well as an archive - page. Feed threads to it and run an update() afterwards.""" + page. Feed threads to it and run an update() afterwards. + + """ def __init__(self, title, archiver, vars=None): pywikibot.Page.__init__(self, Site, title) @@ -321,12 +356,12 @@ self.archives = {} self.archivedThreads = 0 lines = self.get().split('\n') - found = False #Reading header + found = False # Reading header curThread = None for line in lines: - threadHeader = re.search('^== *([^=].*?) *== *$',line) + threadHeader = re.search('^== *([^=].*?) *== *$', line) if threadHeader: - found = True #Reading threads now + found = True # Reading threads now if curThread: self.threads.append(curThread) curThread = DiscussionThread(threadHeader.group(1)) @@ -339,7 +374,7 @@ self.threads.append(curThread) pywikibot.output(u'%d Threads found on %s' % (len(self.threads), self)) - def feedThread(self, thread, maxArchiveSize=(250*1024,'B')): + def feedThread(self, thread, maxArchiveSize=(250 * 1024, 'B')): self.threads.append(thread) self.archivedThreads += 1 if maxArchiveSize[1] == 'B': @@ -353,11 +388,11 @@ def size(self): return len(self.header) + sum([t.size() for t in self.threads]) - def update(self, summary, sortThreads = False): + def update(self, summary, sortThreads=False): if sortThreads: pywikibot.output(u'Sorting threads...') - self.threads.sort(key = lambda t: t.timestamp) - newtext = re.sub('\n*$', '\n\n', self.header) #Fix trailing newlines + self.threads.sort(key=lambda t: t.timestamp) + newtext = re.sub('\n*$', '\n\n', self.header) # Fix trailing newlines for t in self.threads: newtext += t.toText() if self.full: @@ -374,25 +409,25 @@ def __init__(self, Page, tpl, salt, force=False): self.attributes = { - 'algo' : ['old(24h)',False], - 'archive' : ['',False], - 'maxarchivesize' : ['1000M',False], - 'counter' : ['1',False], - 'key' : ['',False], - } + 'algo': ['old(24h)', False], + 'archive': ['', False], + 'maxarchivesize': ['1000M', False], + 'counter': ['1', False], + 'key': ['', False], + } self.tpl = tpl self.salt = salt self.force = force self.Page = DiscussionPage(Page.title(), self) self.loadConfig() self.commentParams = { - 'from' : self.Page.title(), - } + 'from': self.Page.title(), + } self.archives = {} self.archivedThreads = 0 def get(self, attr, default=''): - return self.attributes.get(attr,[default])[0] + return self.attributes.get(attr, [default])[0] def set(self, attr, value, out=True): if attr == 'archive': @@ -404,15 +439,15 @@ and a != 'maxage'] def attr2text(self): - return '{{%s\n%s\n}}' \ - % (self.tpl, - '\n'.join(['|%s = %s'%(a,self.get(a)) - for a in self.saveables()])) + return '{{%s\n%s\n}}' % (self.tpl, + '\n'.join(['|%s = %s ' + % (a, self.get(a)) + for a in self.saveables()])) def key_ok(self): s = new_hash() - s.update(self.salt+'\n') - s.update(self.Page.title().encode('utf8')+'\n') + s.update(self.salt + '\n') + s.update(self.Page.title().encode('utf8') + '\n') return self.get('key') == s.hexdigest() def loadConfig(self): @@ -439,12 +474,13 @@ if not archive: return if not self.force \ - and not self.Page.title()+'/' == archive[:len(self.Page.title())+1] \ + and not self.Page.title() + '/' == archive[ + :len(self.Page.title()) + 1] \ and not self.key_ok(): raise ArchiveSecurityError if not archive in self.archives: self.archives[archive] = DiscussionPage(archive, self, vars) - return self.archives[archive].feedThread(thread,maxArchiveSize) + return self.archives[archive].feedThread(thread, maxArchiveSize) def analyzePage(self): maxArchSize = str2size(self.get('maxarchivesize')) @@ -456,9 +492,9 @@ pywikibot.output(u'Processing %d threads' % len(oldthreads)) for t in oldthreads: if len(oldthreads) - self.archivedThreads \ - <= int(self.get('minthreadsleft',5)): + <= int(self.get('minthreadsleft', 5)): self.Page.threads.append(t) - continue #Because there's too little threads left. + continue # Because there's too little threads left. # TODO: Make an option so that unstamped (unsigned) posts get # archived. why = t.shouldBeArchived(self) @@ -466,17 +502,17 @@ archive = self.get('archive') TStuple = time.gmtime(t.timestamp) vars = { - 'counter' : archCounter, - 'year' : TStuple[0], - 'month' : TStuple[1], - 'monthname' : int2month(TStuple[1]), - 'monthnameshort' : int2month_short(TStuple[1]), - 'week' : int(time.strftime('%W',TStuple)), - } + 'counter': archCounter, + 'year': TStuple[0], + 'month': TStuple[1], + 'monthname': int2month(TStuple[1]), + 'monthnameshort': int2month_short(TStuple[1]), + 'week': int(time.strftime('%W', TStuple)), + } archive = archive % vars - if self.feedArchive(archive,t,maxArchSize,vars): + if self.feedArchive(archive, t, maxArchSize, vars): archCounter += 1 - self.set('counter',str(archCounter)) + self.set('counter', str(archCounter)) whys.append(why) self.archivedThreads += 1 else: @@ -487,7 +523,7 @@ if not self.Page.botMayEdit(Site.username): return whys = self.analyzePage() - if self.archivedThreads < int(self.get('minthreadstoarchive',2)): + if self.archivedThreads < int(self.get('minthreadstoarchive', 2)): # We might not want to archive a measly few threads # (lowers edit frequency) pywikibot.output(u'There are only %d Threads. Skipping' @@ -495,7 +531,7 @@ return if whys: pywikibot.output(u'Archiving %d thread(s).' % self.archivedThreads) - #Save the archives first (so that bugs don't cause a loss of data) + # Save the archives first (so that bugs don't cause a loss of data) for a in sorted(self.archives.keys()): self.commentParams['count'] = self.archives[a].archivedThreads comment = i18n.twntranslate(language, @@ -503,12 +539,12 @@ self.commentParams) self.archives[a].update(comment) - #Save the page itself - rx = re.compile('{{'+self.tpl+'\n.*?\n}}',re.DOTALL) - self.Page.header = rx.sub(self.attr2text(),self.Page.header) + # Save the page itself + rx = re.compile('{{%s\n.*?\n}}' % self.tpl, re.DOTALL) + self.Page.header = rx.sub(self.attr2text(), self.Page.header) self.commentParams['count'] = self.archivedThreads - self.commentParams['archives'] \ - = ', '.join(['[['+a.title()+']]' for a in self.archives.values()]) + self.commentParams['archives'] = ', '.join( + ['[[%s]]' % a.title() for a in self.archives.values()]) if not self.commentParams['archives']: self.commentParams['archives'] = '/dev/null' self.commentParams['why'] = ', '.join(whys) @@ -523,30 +559,30 @@ from optparse import OptionParser parser = OptionParser(usage='usage: %prog [options] [LINKPAGE(s)]') parser.add_option('-f', '--file', dest='filename', - help='load list of pages from FILE', metavar='FILE') + help='load list of pages from FILE', metavar='FILE') parser.add_option('-p', '--page', dest='pagename', - help='archive a single PAGE', metavar='PAGE') + help='archive a single PAGE', metavar='PAGE') parser.add_option('-n', '--namespace', dest='namespace', type='int', - help='only archive pages from a given namespace') + help='only archive pages from a given namespace') parser.add_option('-s', '--salt', dest='salt', - help='specify salt') + help='specify salt') parser.add_option('-F', '--force', action='store_true', dest='force', - help='override security options') + help='override security options') parser.add_option('-c', '--calc', dest='calc', - help='calculate key for PAGE and exit', metavar='PAGE') + help='calculate key for PAGE and exit', metavar='PAGE') parser.add_option('-l', '--locale', dest='locale', - help='switch to locale LOCALE', metavar='LOCALE') + help='switch to locale LOCALE', metavar='LOCALE') parser.add_option('-L', '--lang', dest='lang', - help='current language code', metavar='lang') + help='current language code', metavar='lang') parser.add_option('-T', '--timezone', dest='timezone', - help='switch timezone to TIMEZONE', metavar='TIMEZONE') + help='switch timezone to TIMEZONE', metavar='TIMEZONE') parser.add_option('-S', '--simulate', action='store_true', dest='simulate', - help='Do not change pages, just simulate') + help='Do not change pages, just simulate') (options, args) = parser.parse_args() if options.locale: #Required for english month names - locale.setlocale(locale.LC_TIME,options.locale) + locale.setlocale(locale.LC_TIME, options.locale) if options.timezone: os.environ['TZ'] = options.timezone @@ -558,8 +594,8 @@ if not options.salt: parser.error('Note: you must specify a salt to calculate a key') s = new_hash() - s.update(options.salt+'\n') - s.update(options.calc+'\n') + s.update(options.salt + '\n') + s.update(options.calc + '\n') pywikibot.output(u'key = ' + s.hexdigest()) return @@ -588,23 +624,19 @@ for a in args: pagelist = [] if not options.filename and not options.pagename: - #for pg in pywikibot.Page(Site,a).getReferences(follow_redirects=False,onlyTemplateInclusion=True): - if not options.namespace == None: + if options.namespace is not None: ns = [str(options.namespace)] else: ns = [] for pg in generateTransclusions(Site, a, ns): pagelist.append(pg) if options.filename: - for pg in file(options.filename,'r').readlines(): - pagelist.append(pywikibot.Page(Site,pg)) + for pg in file(options.filename, 'r').readlines(): + pagelist.append(pywikibot.Page(Site, pg)) if options.pagename: pagelist.append(pywikibot.Page(Site, options.pagename, defaultNamespace=3)) - pagelist = sorted(pagelist) - #if not options.namespace == None: - # pagelist = [pg for pg in pagelist if pg.namespace()==options.namespace] for pg in iter(pagelist): pywikibot.output(u'Processing %s' % pg) # Catching exceptions, so that errors in one page do not bail out @@ -617,6 +649,7 @@ pywikibot.output(u'Error occured while processing page %s' % pg) traceback.print_exc() + if __name__ == '__main__': try: main() -- To view, visit https://gerrit.wikimedia.org/r/95761 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ibec267e2d3e592185b8ed1d7f4f0fe365cb39d4d Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <i...@gno.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits