[MediaWiki-commits] [Gerrit] PEP8-ify most of imagerecat.py - change (pywikibot/core)

Legoktm (Code Review) Sun, 29 Sep 2013 16:33:10 -0700

Legoktm has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/86624



Change subject: PEP8-ify most of imagerecat.py
......................................................................

PEP8-ify most of imagerecat.py

Change-Id: I5a4a7dd85eadf7233fe7e388644dadd5ba1e5ffc
---
M scripts/imagerecat.py
1 file changed, 92 insertions(+), 82 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/24/86624/1

diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py
index 9a6ee16..2cb5bf7 100644
--- a/scripts/imagerecat.py
+++ b/scripts/imagerecat.py
@@ -45,13 +45,14 @@
 category_blacklist = []
 countries = []
 
-search_wikis=u'_20'
-hint_wiki=u''
+search_wikis = u'_20'
+hint_wiki = u''
+
 
 def initLists():
-    '''
+    """
     Get the list of countries & the blacklist from Commons.
-    '''
+    """
     global category_blacklist
     global countries
 
@@ -66,18 +67,19 @@
         countries.append(country.title(withNamespace=False))
     return
 
+
 def categorizeImages(generator, onlyFilter, onlyUncat):
-    ''' Loop over all images in generator and try to categorize them. Get
+    """ Loop over all images in generator and try to categorize them. Get
     category suggestions from CommonSense.
 
-    '''
+    """
     for page in generator:
         if page.exists() and (page.namespace() == 6) and \
            (not page.isRedirectPage()):
             imagepage = pywikibot.ImagePage(page.site(), page.title())
             pywikibot.output(u'Working on ' + imagepage.title())
 
-            if (onlyUncat and not(u'Uncategorized' in imagepage.templates())):
+            if onlyUncat and not(u'Uncategorized' in imagepage.templates()):
                 pywikibot.output(u'No Uncategorized template found')
             else:
                 currentCats = getCurrentCats(imagepage)
@@ -89,24 +91,26 @@
                     (commonshelperCats, usage, galleries) = 
getCommonshelperCats(imagepage)
                 newcats = applyAllFilters(commonshelperCats+currentCats)
 
-                if (len(newcats) > 0 and not(set(currentCats)==set(newcats))):
+                if len(newcats) > 0 and not(set(currentCats) == set(newcats)):
                     for cat in newcats:
-                        pywikibot.output(u' Found new cat: ' + cat);
+                        pywikibot.output(u' Found new cat: ' + cat)
                     saveImagePage(imagepage, newcats, usage, galleries,
                                   onlyFilter)
 
+
 def getCurrentCats(imagepage):
-    ''' Get the categories currently on the image '''
+    """ Get the categories currently on the image """
     result = []
     for cat in imagepage.categories():
         result.append(cat.title(withNamespace=False))
     return list(set(result))
 
+
 def getCommonshelperCats(imagepage):
-    ''' Get category suggestions from CommonSense. Parse them and return a list
+    """ Get category suggestions from CommonSense. Parse them and return a list
     of suggestions.
 
-    '''
+    """
     commonshelperCats = []
     usage = []
     galleries = []
@@ -116,24 +120,24 @@
     site = imagepage.site
     lang = site.language()
     family = site.family.name
-    if lang==u'commons' and family==u'commons':
+    if lang == u'commons' and family == u'commons':
         parameters = urllib.urlencode(
-            {'i' : imagepage.title(withNamespace=False).encode('utf-8'),
-             'r' : 'on',
-             'go-clean' : 'Find+Categories',
-             'p' : search_wikis,
-             'cl' : hint_wiki})
-    elif family==u'wikipedia':
+            {'i': imagepage.title(withNamespace=False).encode('utf-8'),
+             'r': 'on',
+             'go-clean': 'Find+Categories',
+             'p': search_wikis,
+             'cl': hint_wiki})
+    elif family == u'wikipedia':
         parameters = urllib.urlencode(
-            {'i' : imagepage.title(withNamespace=False).encode('utf-8'),
-             'r' : 'on',
-             'go-move' : 'Find+Categories',
-             'p' : search_wikis,
-             'cl' : hint_wiki,
-             'w' : lang})
+            {'i': imagepage.title(withNamespace=False).encode('utf-8'),
+             'r': 'on',
+             'go-move': 'Find+Categories',
+             'p': search_wikis,
+             'cl': hint_wiki,
+             'w': lang})
     else:
         #Cant handle other sites atm
-        return ([], [], [])
+        return [], [], []
 
     commonsenseRe = 
re.compile('^#COMMONSENSE(.*)#USAGE(\s)+\((?P<usagenum>(\d)+)\)\s(?P<usage>(.*))\s#KEYWORDS(\s)+\((?P<keywords>(\d)+)\)(.*)#CATEGORIES(\s)+\((?P<catnum>(\d)+)\)\s(?P<cats>(.*))\s#GALLERIES(\s)+\((?P<galnum>(\d)+)\)\s(?P<gals>(.*))\s(.*)#EOF$',
 re.MULTILINE + re.DOTALL)
 
@@ -141,10 +145,10 @@
     matches = None
     maxtries = 10
     tries = 0
-    while(not gotInfo):
+    while not gotInfo:
         try:
-            if ( tries < maxtries ):
-                tries = tries + 1
+            if tries < maxtries:
+                tries += 1
                 commonsHelperPage = urllib.urlopen(
                     
"http://toolserver.org/~daniel/WikiSense/CommonSense.php?%s"; % parameters)
                 matches = commonsenseRe.search(
@@ -157,18 +161,18 @@
         except socket.timeout:
             pywikibot.output(u'Got a timeout, let\'s try again')
 
-    if (matches and gotInfo):
-        if (matches.group('usagenum') > 0):
+    if matches and gotInfo:
+        if matches.group('usagenum') > 0:
             used = matches.group('usage').splitlines()
             for use in used:
                 usage= usage + getUsage(use)
                 #pywikibot.output(use)
-        if (matches.group('catnum') > 0):
+        if matches.group('catnum') > 0:
             cats = matches.group('cats').splitlines()
             for cat in cats:
                 commonshelperCats.append(cat.replace('_', ' '))
                 pywikibot.output(u'category : ' + cat)
-        if (matches.group('galnum') > 0):
+        if matches.group('galnum') > 0:
             gals = matches.group('gals').splitlines()
             for gal in gals:
                 galleries.append(gal.replace('_', ' '))
@@ -177,12 +181,13 @@
     galleries = list(set(galleries))
     for (lang, project, article) in usage:
         pywikibot.output(lang + project + article)
-    return (commonshelperCats, usage, galleries)
+    return commonshelperCats, usage, galleries
+
 
 def getOpenStreetMapCats(latitude, longitude):
-    '''
+    """
     Get a list of location categories based on the OSM nomatim tool
-    '''
+    """
     result = []
     locationList = getOpenStreetMap(latitude, longitude)
     for i in range(0, len(locationList)):
@@ -193,25 +198,25 @@
             category = getCategoryByName(name=locationList[i], 
parent=locationList[i+1])
         else:
             category = getCategoryByName(name=locationList[i])
-        if category and not category==u'':
+        if category and not category == u'':
             result.append(category)
     #print result
     return result
 
 
 def getOpenStreetMap(latitude, longitude):
-    '''
+    """
     Get the result from http://nominatim.openstreetmap.org/reverse
     and put it in a list of tuples to play around with
-    '''
+    """
     result = []
     gotInfo = False
     parameters = urllib.urlencode({'lat' : latitude, 'lon' : longitude, 
'accept-language' : 'en'})
-    while(not gotInfo):
+    while not gotInfo:
         try:
             page = 
urllib.urlopen("http://nominatim.openstreetmap.org/reverse?format=xml&%s"; % 
parameters)
             et = xml.etree.ElementTree.parse(page)
-            gotInfo=True
+            gotInfo = True
         except IOError:
             pywikibot.output(u'Got an IOError, let\'s try again')
             time.sleep(30)
@@ -233,30 +238,28 @@
     #print result
     return result
 
+
 def getCategoryByName(name, parent=u'', grandparent=u''):
 
-    if not parent==u'':
+    if not parent == u'':
         workname = name.strip() + u',_' + parent.strip()
-        workcat = pywikibot.Category(
-                    pywikibot.Site(u'commons', u'commons'), workname)
+        workcat = pywikibot.Category(pywikibot.Site(u'commons', u'commons'), 
workname)
         if workcat.exists():
             return workname
     if not grandparent==u'':
         workname = name.strip() + u',_' + grandparent.strip()
-        workcat = pywikibot.Category(
-                pywikibot.Site(u'commons', u'commons'), workname)
+        workcat = pywikibot.Category(pywikibot.Site(u'commons', u'commons'), 
workname)
         if workcat.exists():
             return workname
     workname = name.strip()
-    workcat = pywikibot.Category(
-                pywikibot.Site(u'commons', u'commons'), workname)
+    workcat = pywikibot.Category(pywikibot.Site(u'commons', u'commons'), 
workname)
     if workcat.exists():
         return workname
     return u''
 
 
 def getUsage(use):
-    ''' Parse the Commonsense output to get the usage '''
+    """ Parse the Commonsense output to get the usage """
     result = []
     lang = ''
     project = ''
@@ -265,21 +268,22 @@
         '^(?P<lang>([\w-]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))')
     matches = usageRe.search(use)
     if matches:
-        if (matches.group('lang')):
+        if matches.group('lang'):
             lang = matches.group('lang')
             #pywikibot.output(lang)
-        if (matches.group('project')):
+        if matches.group('project'):
             project = matches.group('project')
             #pywikibot.output(project)
-        if (matches.group('articles')):
+        if matches.group('articles'):
             articles = matches.group('articles')
             #pywikibot.output(articles)
     for article in articles.split():
         result.append((lang, project, article))
     return result
 
+
 def applyAllFilters(categories):
-    ''' Apply all filters on categories. '''
+    """ Apply all filters on categories. """
     result = []
     result = filterDisambiguation(categories)
     result = followRedirects(result)
@@ -288,17 +292,19 @@
     result = filterParents(result)
     return result
 
+
 def filterBlacklist(categories):
-    ''' Filter out categories which are on the blacklist. '''
+    """ Filter out categories which are on the blacklist. """
     result = []
     for cat in categories:
         cat = cat.replace('_', ' ')
-        if (cat not in category_blacklist):
+        if not (cat in category_blacklist):
             result.append(cat)
     return list(set(result))
 
+
 def filterDisambiguation(categories):
-    ''' Filter out disambiguation categories. '''
+    """ Filter out disambiguation categories. """
     result = []
     for cat in categories:
         if (not pywikibot.Page(pywikibot.Site(u'commons', u'commons'),
@@ -306,8 +312,9 @@
             result.append(cat)
     return result
 
+
 def followRedirects(categories):
-    ''' If a category is a redirect, replace the category with the target. '''
+    """ If a category is a redirect, replace the category with the target. """
     result = []
     for cat in categories:
         categoryPage = pywikibot.Page(pywikibot.getSite(u'commons', 
u'commons'),
@@ -320,19 +327,20 @@
             result.append(cat)
     return result
 
+
 def filterCountries(categories):
-    ''' Try to filter out ...by country categories.
+    """ Try to filter out ...by country categories.
     First make a list of any ...by country categories and try to find some
     countries. If a by country category has a subcategoy containing one of the
     countries found, add it. The ...by country categories remain in the set and
     should be filtered out by filterParents.
 
-    '''
+    """
     result = categories
     listByCountry = []
     listCountries = []
     for cat in categories:
-        if (cat.endswith(u'by country')):
+        if cat.endswith(u'by country'):
             listByCountry.append(cat)
 
         #If cat contains 'by country' add it to the list
@@ -341,27 +349,26 @@
             for country in countries:
                 if country in cat:
                     listCountries.append(country)
-    if(len(listByCountry) > 0):
+    if len(listByCountry) > 0:
         for bc in listByCountry:
             category = pywikibot.Category(
                 pywikibot.Site(u'commons', u'commons'), u'Category:' + bc)
             for subcategory in category.subcategories():
                 for country in listCountries:
-                    if 
(subcategory.title(withNamespace=False).endswith(country)):
+                    if 
subcategory.title(withNamespace=False).endswith(country):
                         result.append(subcategory.title(withNamespace=False))
     return list(set(result))
 
-def filterParents(categories):
-    ''' Remove all parent categories from the set to prevent 
overcategorization.
 
-    '''
+def filterParents(categories):
+    """ Remove all parent categories from the set to prevent 
overcategorization. """
     result = []
     toFilter = u''
     for cat in categories:
         cat = cat.replace('_', ' ')
         toFilter = toFilter + "[[Category:" + cat + "]]\n"
-    parameters = urllib.urlencode({'source' : toFilter.encode('utf-8'),
-                                   'bot' : '1'})
+    parameters = urllib.urlencode({'source': toFilter.encode('utf-8'),
+                                   'bot': '1'})
     filterCategoriesRe = re.compile('\[\[Category:([^\]]*)\]\]')
     try:
         filterCategoriesPage = urllib.urlopen(
@@ -377,17 +384,18 @@
         return categories
     return result
 
+
 def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter):
-    ''' Remove the old categories and add the new categories to the image. '''
+    """ Remove the old categories and add the new categories to the image. """
     newtext = pywikibot.removeCategoryLinks(imagepage.get(), imagepage.site())
-    if not(onlyFilter):
+    if not onlyFilter:
         newtext = removeTemplates(newtext)
         newtext = newtext + getCheckCategoriesTemplate(usage, galleries,
                                                        len(newcats))
-    newtext = newtext + u'\n'
+    newtext += u'\n'
     for category in newcats:
         newtext = newtext + u'[[Category:' + category + u']]\n'
-    if(onlyFilter):
+    if onlyFilter:
         comment = u'Filtering categories'
     else:
         comment = u'Image is categorized by a bot using data from 
[[Commons:Tools#CommonSense|CommonSense]]'
@@ -395,11 +403,11 @@
     imagepage.put(newtext, comment)
     return
 
-def removeTemplates(oldtext = u''):
-    '''
+
+def removeTemplates(oldtext=u''):
+    """
     Remove {{Uncategorized}} and {{Check categories}} templates
-    '''
-    result = u''
+    """
     result = re.sub(
         u'\{\{\s*([Uu]ncat(egori[sz]ed( 
image)?)?|[Nn]ocat|[Nn]eedscategory)[^}]*\}\}', u'', oldtext)
     result = re.sub(u'<!-- Remove this line once you have added categories 
-->',
@@ -407,10 +415,11 @@
     result = re.sub(u'\{\{\s*[Cc]heck categories[^}]*\}\}', u'', result)
     return result
 
+
 def getCheckCategoriesTemplate(usage, galleries, ncats):
-    '''
+    """
     Build the check categories template with all parameters
-    '''
+    """
     result = u'{{Check 
categories|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}\n'
     usageCounter = 1
     for (lang, project, article) in usage:
@@ -418,19 +427,20 @@
         result += u'|wiki%d=%s' % (usageCounter, project)
         result += u'|article%d=%s' % (usageCounter, article)
         result += u'\n'
-        usageCounter = usageCounter + 1
+        usageCounter += 1
     galleryCounter = 1
     for gallery in galleries:
         result += u'|gallery%d=%s' % (galleryCounter, gallery.replace('_', ' 
')) + u'\n'
-        galleryCounter = galleryCounter + 1
+        galleryCounter += 1
     result += u'|ncats=%d\n' % ncats
     result += u'}}\n'
     return result
 
+
 def main(args):
-    '''
+    """
     Main loop. Get a generator and options. Work on all images in the 
generator.
-    '''
+    """
     generator = None
     onlyFilter = False
     onlyUncat = False
@@ -448,7 +458,7 @@
         elif arg.startswith('-hint:'):
             hint_wiki = arg [len('-hint:'):]
         elif arg.startswith('-onlyhint'):
-            search_wikis = arg [len('-onlyhint:'):]
+            search_wikis = arg[len('-onlyhint:'):]
         else:
             genFactory.handleArg(arg)
 

-- 
To view, visit https://gerrit.wikimedia.org/r/86624
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5a4a7dd85eadf7233fe7e388644dadd5ba1e5ffc
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Legoktm <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] PEP8-ify most of imagerecat.py - change (pywikibot/core)

Reply via email to