[MediaWiki-commits] [Gerrit] [PEP8] changes for archivebot.py - change (pywikibot/compat)

Xqt (Code Review) Sat, 16 Nov 2013 00:08:40 -0800

Xqt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/95761



Change subject: [PEP8] changes for archivebot.py
......................................................................

[PEP8] changes for archivebot.py

Change-Id: Ibec267e2d3e592185b8ed1d7f4f0fe365cb39d4d
---
M archivebot.py
1 file changed, 141 insertions(+), 108 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/compat 
refs/changes/61/95761/1

diff --git a/archivebot.py b/archivebot.py
index d0f79ee..79a0faf 100644
--- a/archivebot.py
+++ b/archivebot.py
@@ -76,21 +76,30 @@
 #
 __version__ = '$Id$'
 #
-import wikipedia as pywikibot
-from pywikibot import i18n
-import pagegenerators, query
-Site = pywikibot.getSite()
-
-import os, re, time, locale, traceback, string, urllib, unicodedata
-
-try: #Get a constructor for the MD5 hash object
+import os
+import re
+import time
+import locale
+import traceback
+import string
+import urllib
+import unicodedata
+try:  # Get a constructor for the MD5 hash object
     import hashlib
     new_hash = hashlib.md5
-except ImportError: #Old python?
+except ImportError:  # Old python?
     import md5
     new_hash = md5.md5
 
+import wikipedia as pywikibot
+from pywikibot import i18n
+import pagegenerators
+import query
+
+
+Site = pywikibot.getSite()
 language = Site.language()
+
 
 def message(key, lang=Site.language()):
     return i18n.twtranslate(lang, key)
@@ -102,7 +111,9 @@
 
 class MissingConfigError(pywikibot.Error):
     """The config is missing in the header (either it's in one of the threads
-    or transcluded from another page)."""
+    or transcluded from another page).
+
+    """
 
 
 class AlgorithmError(MalformedConfigError):
@@ -111,20 +122,25 @@
 
 class ArchiveSecurityError(pywikibot.Error):
     """Archive is not a subpage of page being archived and key not specified
-    (or incorrect)."""
+    (or incorrect).
+
+    """
 
 
 def str2time(str):
     """Accepts a string defining a time period:
     7d - 7 days
     36h - 36 hours
-    Returns the corresponding time, measured in seconds."""
+    Returns the corresponding time, measured in seconds.
+
+    """
     if str[-1] == 'd':
-        return int(str[:-1])*24*3600
+        return int(str[:-1]) * 24 * 3600
     elif str[-1] == 'h':
-        return int(str[:-1])*3600
+        return int(str[:-1]) * 3600
     else:
         return int(str)
+
 
 def str2size(str):
     """Accepts a string defining a size:
@@ -132,47 +148,57 @@
     150K - 150 kilobytes
     2M - 2 megabytes
     Returns a tuple (size,unit), where size is an integer and unit is
-    'B' (bytes) or 'T' (threads)."""
-    if str[-1] in string.digits: #TODO: de-uglify
-        return (int(str),'B')
+    'B' (bytes) or 'T' (threads).
+
+    """
+    if str[-1] in string.digits:  # TODO: de-uglify
+        return (int(str), 'B')
     elif str[-1] in ['K', 'k']:
-        return (int(str[:-1])*1024,'B')
+        return (int(str[:-1]) * 1024, 'B')
     elif str[-1] == 'M':
-        return (int(str[:-1])*1024*1024,'B')
+        return (int(str[:-1]) * 1024 * 1024, 'B')
     elif str[-1] == 'T':
-        return (int(str[:-1]),'T')
+        return (int(str[:-1]), 'T')
     else:
-        return (int(str[:-1])*1024,'B')
+        return (int(str[:-1]) * 1024, 'B')
+
 
 def int2month(num):
     """Returns the locale's full name of month 'num' (1-12)."""
     if hasattr(locale, 'nl_langinfo'):
-        return locale.nl_langinfo(locale.MON_1+num-1).decode('utf-8')
+        return locale.nl_langinfo(locale.MON_1 + num - 1).decode('utf-8')
     Months = ['january', 'february', 'march', 'april', 'may_long', 'june',
               'july', 'august', 'september', 'october', 'november', 'december']
-    return Site.mediawiki_message(Months[num-1])
+    return Site.mediawiki_message(Months[num - 1])
+
 
 def int2month_short(num):
     """Returns the locale's abbreviated name of month 'num' (1-12)."""
     if hasattr(locale, 'nl_langinfo'):
         #filter out non-alpha characters
-        return ''.join([c for c in 
locale.nl_langinfo(locale.ABMON_1+num-1).decode('utf-8') if c.isalpha()])
+        return ''.join([c for c in
+                        locale.nl_langinfo(
+                            locale.ABMON_1 + num - 1).decode('utf-8')
+                        if c.isalpha()])
     Months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
               'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
-    return Site.mediawiki_message(Months[num-1])
+    return Site.mediawiki_message(Months[num - 1])
+
 
 def txt2timestamp(txt, format):
     """Attempts to convert the timestamp 'txt' according to given 'format'.
-    On success, returns the time tuple; on failure, returns None."""
+    On success, returns the time tuple; on failure, returns None.
+
+    """
 ##    print txt, format
     try:
-        return time.strptime(txt,format)
+        return time.strptime(txt, format)
     except ValueError:
         try:
-            return time.strptime(txt.encode('utf8'),format)
+            return time.strptime(txt.encode('utf8'), format)
         except:
             pass
-        return None
+
 
 def generateTransclusions(Site, template, namespaces=[]):
     pywikibot.output(u'Fetching template transclusions...')
@@ -186,12 +212,14 @@
 
 
 class DiscussionThread(object):
-    """An object representing a discussion thread on a page, that is something 
of the form:
+    """An object representing a discussion thread on a page, that is something
+    of the form:
 
     == Title of thread ==
 
     Thread content here. ~~~~
     :Reply, etc. ~~~~
+
     """
 
     def __init__(self, title):
@@ -201,7 +229,7 @@
 
     def __repr__(self):
         return '%s("%s",%d bytes)' \
-               % (self.__class__.__name__,self.title,len(self.content))
+               % (self.__class__.__name__, self.title, len(self.content))
 
     def feedLine(self, line):
         if not self.content and not line:
@@ -216,33 +244,38 @@
 # 2007. december 8., 13:42 (CET)
         TM = re.search(r'(\d\d):(\d\d), (\d\d?) (\S+) (\d\d\d\d) \(.*?\)', 
line)
         if not TM:
-            TM = re.search(r'(\d\d):(\d\d), (\S+) (\d\d?), (\d\d\d\d) 
\(.*?\)', line)
+            TM = re.search(r'(\d\d):(\d\d), (\S+) (\d\d?), (\d\d\d\d) \(.*?\)',
+                           line)
         if not TM:
-            TM = re.search(r'(\d{4})\. (\S+) (\d\d?)\., (\d\d:\d\d) \(.*?\)', 
line)
+            TM = re.search(r'(\d{4})\. (\S+) (\d\d?)\., (\d\d:\d\d) \(.*?\)',
+                           line)
 # 18. apr 2006 kl.18:39 (UTC)
 # 4. nov 2006 kl. 20:46 (CET)
         if not TM:
-            TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kl\.\W*(\d\d):(\d\d) 
\(.*?\)', line)
+            TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kl\.\W*(\d\d):(\d\d) 
\(.*?\)',
+                           line)
 #3. joulukuuta 2008 kello 16.26 (EET)
         if not TM:
-            TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d) 
\(.*?\)', line)
+            TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d) 
\(.*?\)',
+                           line)
         if not TM:
 # 14:23, 12. Jan. 2009 (UTC)
             pat = re.compile(r'(\d\d):(\d\d), (\d\d?)\. (\S+)\.? (\d\d\d\d) 
\((?:UTC|CES?T)\)')
             TM = pat.search(line)
 # ro.wiki: 4 august 2012 13:01 (EEST)
         if not TM:
-            TM = re.search(r'(\d\d?) (\S+) (\d\d\d\d) (\d\d):(\d\d) \(.*?\)', 
line)
+            TM = re.search(r'(\d\d?) (\S+) (\d\d\d\d) (\d\d):(\d\d) \(.*?\)',
+                           line)
         if TM:
             # Strip away all diacritics in the Mn ('Mark, non-spacing') 
category
-            # NFD decomposition splits combined characters (e.g. 'ä", LATIN 
SMALL
-            # LETTER A WITH DIAERESIS) into two entities: LATIN SMALL LETTER A
-            # and COMBINING DIAERESIS. The latter falls in the Mn category and 
is
-            # filtered out, resuling in 'a'.
+            # NFD decomposition splits combined characters (e.g. 'ä",
+            # LATIN SMALL LETTER A WITH DIAERESIS) into two entities:
+            # LATIN SMALL LETTER A and COMBINING DIAERESIS. The latter falls
+            # in the Mn category and is filtered out, resuling in 'a'.
             _TM = ''.join(c for c in unicodedata.normalize('NFD', TM.group(0))
-                    if unicodedata.category(c) != 'Mn')
+                          if unicodedata.category(c) != 'Mn')
 
-            TIME = txt2timestamp(_TM,"%d. %b %Y kl. %H:%M (%Z)")
+            TIME = txt2timestamp(_TM, "%d. %b %Y kl. %H:%M (%Z)")
             if not TIME:
                 TIME = txt2timestamp(_TM, "%Y. %B %d., %H:%M (%Z)")
             if not TIME:
@@ -264,7 +297,7 @@
             if not TIME:
                 TIME = txt2timestamp(_TM, "%H:%M, %B %d, %Y (%Z)")
             if not TIME:
-                TIME = txt2timestamp(_TM,"%d. %Bta %Y kello %H.%M (%Z)")
+                TIME = txt2timestamp(_TM, "%d. %Bta %Y kello %H.%M (%Z)")
             if not TIME:
                 TIME = txt2timestamp(_TM, "%d %B %Y %H:%M (%Z)")
             if not TIME:
@@ -282,9 +315,9 @@
     def toText(self):
         return "== " + self.title + ' ==\n\n' + self.content
 
-    def shouldBeArchived(self,Archiver):
+    def shouldBeArchived(self, Archiver):
         algo = Archiver.get('algo')
-        reT = re.search(r'^old\((.*)\)$',algo)
+        reT = re.search(r'^old\((.*)\)$', algo)
         if reT:
             if not self.timestamp:
                 return ''
@@ -298,7 +331,9 @@
 
 class DiscussionPage(pywikibot.Page):
     """A class that represents a single discussion page as well as an archive
-    page. Feed threads to it and run an update() afterwards."""
+    page. Feed threads to it and run an update() afterwards.
+
+    """
 
     def __init__(self, title, archiver, vars=None):
         pywikibot.Page.__init__(self, Site, title)
@@ -321,12 +356,12 @@
         self.archives = {}
         self.archivedThreads = 0
         lines = self.get().split('\n')
-        found = False #Reading header
+        found = False  # Reading header
         curThread = None
         for line in lines:
-            threadHeader = re.search('^== *([^=].*?) *== *$',line)
+            threadHeader = re.search('^== *([^=].*?) *== *$', line)
             if threadHeader:
-                found = True #Reading threads now
+                found = True  # Reading threads now
                 if curThread:
                     self.threads.append(curThread)
                 curThread = DiscussionThread(threadHeader.group(1))
@@ -339,7 +374,7 @@
             self.threads.append(curThread)
         pywikibot.output(u'%d Threads found on %s' % (len(self.threads), self))
 
-    def feedThread(self, thread, maxArchiveSize=(250*1024,'B')):
+    def feedThread(self, thread, maxArchiveSize=(250 * 1024, 'B')):
         self.threads.append(thread)
         self.archivedThreads += 1
         if maxArchiveSize[1] == 'B':
@@ -353,11 +388,11 @@
     def size(self):
         return len(self.header) + sum([t.size() for t in self.threads])
 
-    def update(self, summary, sortThreads = False):
+    def update(self, summary, sortThreads=False):
         if sortThreads:
             pywikibot.output(u'Sorting threads...')
-            self.threads.sort(key = lambda t: t.timestamp)
-        newtext = re.sub('\n*$', '\n\n', self.header) #Fix trailing newlines
+            self.threads.sort(key=lambda t: t.timestamp)
+        newtext = re.sub('\n*$', '\n\n', self.header)  # Fix trailing newlines
         for t in self.threads:
             newtext += t.toText()
         if self.full:
@@ -374,25 +409,25 @@
 
     def __init__(self, Page, tpl, salt, force=False):
         self.attributes = {
-                'algo' : ['old(24h)',False],
-                'archive' : ['',False],
-                'maxarchivesize' : ['1000M',False],
-                'counter' : ['1',False],
-                'key' : ['',False],
-                }
+            'algo': ['old(24h)', False],
+            'archive': ['', False],
+            'maxarchivesize': ['1000M', False],
+            'counter': ['1', False],
+            'key': ['', False],
+        }
         self.tpl = tpl
         self.salt = salt
         self.force = force
         self.Page = DiscussionPage(Page.title(), self)
         self.loadConfig()
         self.commentParams = {
-                'from' : self.Page.title(),
-                }
+            'from': self.Page.title(),
+        }
         self.archives = {}
         self.archivedThreads = 0
 
     def get(self, attr, default=''):
-        return self.attributes.get(attr,[default])[0]
+        return self.attributes.get(attr, [default])[0]
 
     def set(self, attr, value, out=True):
         if attr == 'archive':
@@ -404,15 +439,15 @@
                 and a != 'maxage']
 
     def attr2text(self):
-        return '{{%s\n%s\n}}' \
-               % (self.tpl,
-                  '\n'.join(['|%s = %s'%(a,self.get(a))
-                             for a in self.saveables()]))
+        return '{{%s\n%s\n}}' % (self.tpl,
+                                 '\n'.join(['|%s = %s '
+                                            % (a, self.get(a))
+                                            for a in self.saveables()]))
 
     def key_ok(self):
         s = new_hash()
-        s.update(self.salt+'\n')
-        s.update(self.Page.title().encode('utf8')+'\n')
+        s.update(self.salt + '\n')
+        s.update(self.Page.title().encode('utf8') + '\n')
         return self.get('key') == s.hexdigest()
 
     def loadConfig(self):
@@ -439,12 +474,13 @@
         if not archive:
             return
         if not self.force \
-           and not self.Page.title()+'/' == archive[:len(self.Page.title())+1] 
\
+           and not self.Page.title() + '/' == archive[
+               :len(self.Page.title()) + 1] \
            and not self.key_ok():
             raise ArchiveSecurityError
         if not archive in self.archives:
             self.archives[archive] = DiscussionPage(archive, self, vars)
-        return self.archives[archive].feedThread(thread,maxArchiveSize)
+        return self.archives[archive].feedThread(thread, maxArchiveSize)
 
     def analyzePage(self):
         maxArchSize = str2size(self.get('maxarchivesize'))
@@ -456,9 +492,9 @@
         pywikibot.output(u'Processing %d threads' % len(oldthreads))
         for t in oldthreads:
             if len(oldthreads) - self.archivedThreads \
-               <= int(self.get('minthreadsleft',5)):
+               <= int(self.get('minthreadsleft', 5)):
                 self.Page.threads.append(t)
-                continue #Because there's too little threads left.
+                continue  # Because there's too little threads left.
             # TODO: Make an option so that unstamped (unsigned) posts get
             # archived.
             why = t.shouldBeArchived(self)
@@ -466,17 +502,17 @@
                 archive = self.get('archive')
                 TStuple = time.gmtime(t.timestamp)
                 vars = {
-                        'counter' : archCounter,
-                        'year' : TStuple[0],
-                        'month' : TStuple[1],
-                        'monthname' : int2month(TStuple[1]),
-                        'monthnameshort' : int2month_short(TStuple[1]),
-                        'week' : int(time.strftime('%W',TStuple)),
-                        }
+                    'counter': archCounter,
+                    'year': TStuple[0],
+                    'month': TStuple[1],
+                    'monthname': int2month(TStuple[1]),
+                    'monthnameshort': int2month_short(TStuple[1]),
+                    'week': int(time.strftime('%W', TStuple)),
+                }
                 archive = archive % vars
-                if self.feedArchive(archive,t,maxArchSize,vars):
+                if self.feedArchive(archive, t, maxArchSize, vars):
                     archCounter += 1
-                    self.set('counter',str(archCounter))
+                    self.set('counter', str(archCounter))
                 whys.append(why)
                 self.archivedThreads += 1
             else:
@@ -487,7 +523,7 @@
         if not self.Page.botMayEdit(Site.username):
             return
         whys = self.analyzePage()
-        if self.archivedThreads < int(self.get('minthreadstoarchive',2)):
+        if self.archivedThreads < int(self.get('minthreadstoarchive', 2)):
             # We might not want to archive a measly few threads
             # (lowers edit frequency)
             pywikibot.output(u'There are only %d Threads. Skipping'
@@ -495,7 +531,7 @@
             return
         if whys:
             pywikibot.output(u'Archiving %d thread(s).' % self.archivedThreads)
-            #Save the archives first (so that bugs don't cause a loss of data)
+            # Save the archives first (so that bugs don't cause a loss of data)
             for a in sorted(self.archives.keys()):
                 self.commentParams['count'] = self.archives[a].archivedThreads
                 comment = i18n.twntranslate(language,
@@ -503,12 +539,12 @@
                                             self.commentParams)
                 self.archives[a].update(comment)
 
-            #Save the page itself
-            rx = re.compile('{{'+self.tpl+'\n.*?\n}}',re.DOTALL)
-            self.Page.header = rx.sub(self.attr2text(),self.Page.header)
+            # Save the page itself
+            rx = re.compile('{{%s\n.*?\n}}' % self.tpl, re.DOTALL)
+            self.Page.header = rx.sub(self.attr2text(), self.Page.header)
             self.commentParams['count'] = self.archivedThreads
-            self.commentParams['archives'] \
-                = ', '.join(['[['+a.title()+']]' for a in 
self.archives.values()])
+            self.commentParams['archives'] = ', '.join(
+                ['[[%s]]' % a.title() for a in self.archives.values()])
             if not self.commentParams['archives']:
                 self.commentParams['archives'] = '/dev/null'
             self.commentParams['why'] = ', '.join(whys)
@@ -523,30 +559,30 @@
     from optparse import OptionParser
     parser = OptionParser(usage='usage: %prog [options] [LINKPAGE(s)]')
     parser.add_option('-f', '--file', dest='filename',
-            help='load list of pages from FILE', metavar='FILE')
+                      help='load list of pages from FILE', metavar='FILE')
     parser.add_option('-p', '--page', dest='pagename',
-            help='archive a single PAGE', metavar='PAGE')
+                      help='archive a single PAGE', metavar='PAGE')
     parser.add_option('-n', '--namespace', dest='namespace', type='int',
-            help='only archive pages from a given namespace')
+                      help='only archive pages from a given namespace')
     parser.add_option('-s', '--salt', dest='salt',
-            help='specify salt')
+                      help='specify salt')
     parser.add_option('-F', '--force', action='store_true', dest='force',
-            help='override security options')
+                      help='override security options')
     parser.add_option('-c', '--calc', dest='calc',
-            help='calculate key for PAGE and exit', metavar='PAGE')
+                      help='calculate key for PAGE and exit', metavar='PAGE')
     parser.add_option('-l', '--locale', dest='locale',
-            help='switch to locale LOCALE', metavar='LOCALE')
+                      help='switch to locale LOCALE', metavar='LOCALE')
     parser.add_option('-L', '--lang', dest='lang',
-            help='current language code', metavar='lang')
+                      help='current language code', metavar='lang')
     parser.add_option('-T', '--timezone', dest='timezone',
-            help='switch timezone to TIMEZONE', metavar='TIMEZONE')
+                      help='switch timezone to TIMEZONE', metavar='TIMEZONE')
     parser.add_option('-S', '--simulate', action='store_true', dest='simulate',
-            help='Do not change pages, just simulate')
+                      help='Do not change pages, just simulate')
     (options, args) = parser.parse_args()
 
     if options.locale:
         #Required for english month names
-        locale.setlocale(locale.LC_TIME,options.locale)
+        locale.setlocale(locale.LC_TIME, options.locale)
 
     if options.timezone:
         os.environ['TZ'] = options.timezone
@@ -558,8 +594,8 @@
         if not options.salt:
             parser.error('Note: you must specify a salt to calculate a key')
         s = new_hash()
-        s.update(options.salt+'\n')
-        s.update(options.calc+'\n')
+        s.update(options.salt + '\n')
+        s.update(options.calc + '\n')
         pywikibot.output(u'key = ' + s.hexdigest())
         return
 
@@ -588,23 +624,19 @@
     for a in args:
         pagelist = []
         if not options.filename and not options.pagename:
-            #for pg in 
pywikibot.Page(Site,a).getReferences(follow_redirects=False,onlyTemplateInclusion=True):
-            if not options.namespace == None:
+            if options.namespace is not None:
                 ns = [str(options.namespace)]
             else:
                 ns = []
             for pg in generateTransclusions(Site, a, ns):
                 pagelist.append(pg)
         if options.filename:
-            for pg in file(options.filename,'r').readlines():
-                pagelist.append(pywikibot.Page(Site,pg))
+            for pg in file(options.filename, 'r').readlines():
+                pagelist.append(pywikibot.Page(Site, pg))
         if options.pagename:
             pagelist.append(pywikibot.Page(Site, options.pagename,
                                            defaultNamespace=3))
-
         pagelist = sorted(pagelist)
-        #if not options.namespace == None:
-        #    pagelist = [pg for pg in pagelist if 
pg.namespace()==options.namespace]
         for pg in iter(pagelist):
             pywikibot.output(u'Processing %s' % pg)
             # Catching exceptions, so that errors in one page do not bail out
@@ -617,6 +649,7 @@
                 pywikibot.output(u'Error occured while processing page %s' % 
pg)
                 traceback.print_exc()
 
+
 if __name__ == '__main__':
     try:
         main()

-- 
To view, visit https://gerrit.wikimedia.org/r/95761
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibec267e2d3e592185b8ed1d7f4f0fe365cb39d4d
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <i...@gno.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] [PEP8] changes for archivebot.py - change (pywikibot/compat)

Reply via email to