jenkins-bot has submitted this change and it was merged.

Change subject: [FEAT] Scripts: Use more intelligent generators
......................................................................


[FEAT] Scripts: Use more intelligent generators

Instead of using the pagegenerators as often as possible, this is only
using them, when they provide additional funtionality. Otherwise it's
using the normal methods of a page. This removes all usages of the
NamespaceFilterPageGenerator in the scripts which does only filter out
invalid pages but still requests them.

Change-Id: Iddc36b040ff010467559ea8fd7523056a511cb6f
---
M pywikibot/pagegenerators.py
M scripts/add_text.py
M scripts/checkimages.py
M scripts/commonscat.py
M scripts/disambredir.py
M scripts/fixing_redirects.py
M scripts/nowcommons.py
M scripts/solve_disambiguation.py
M scripts/spamremove.py
M scripts/templatecount.py
M scripts/unlink.py
M scripts/weblinkchecker.py
12 files changed, 33 insertions(+), 80 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 792e092..601f809 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -1010,7 +1010,8 @@
 
 
 def CategorizedPageGenerator(category, recurse=False, start=None,
-                             step=None, total=None, content=False):
+                             step=None, total=None, content=False,
+                             namespaces=None):
     """Yield all pages in a specific category.
 
     If recurse is True, pages in subcategories are included as well; if
@@ -1026,7 +1027,7 @@
 
     """
     kwargs = dict(recurse=recurse, step=step, total=total,
-                  content=content)
+                  content=content, namespaces=namespaces)
     if start:
         kwargs['sortby'] = 'sortkey'
         kwargs['startsort'] = start
@@ -1473,7 +1474,7 @@
 ImageGenerator = FileGenerator
 
 
-def PageWithTalkPageGenerator(generator):
+def PageWithTalkPageGenerator(generator, return_talk_only=False):
     """Yield pages and associated talk pages from another generator.
 
     Only yields talk pages if the original generator yields a non-talk page,
@@ -1481,7 +1482,8 @@
 
     """
     for page in generator:
-        yield page
+        if not return_talk_only or page.isTalkPage():
+            yield page
         if not page.isTalkPage():
             yield page.toggleTalkPage()
 
diff --git a/scripts/add_text.py b/scripts/add_text.py
index 3bfc2b9..ae21071 100644
--- a/scripts/add_text.py
+++ b/scripts/add_text.py
@@ -288,7 +288,6 @@
     textfile = None
     talkPage = False
     reorderEnabled = True
-    namespaces = []
 
     # Put the text above or below the text?
     up = False
@@ -346,14 +345,7 @@
         pywikibot.error("The text to add wasn't given.")
         return
     if talkPage:
-        generator = pagegenerators.PageWithTalkPageGenerator(generator)
-        site = pywikibot.Site()
-        for namespace in site.namespaces():
-            index = site.getNamespaceIndex(namespace)
-            if index % 2 == 1 and index > 0:
-                namespaces += [index]
-        generator = pagegenerators.NamespaceFilterPageGenerator(
-            generator, namespaces, site)
+        generator = pagegenerators.PageWithTalkPageGenerator(generator, True)
     for page in generator:
         (text, newtext, always) = add_text(page, addText, summary, regexSkip,
                                            regexSkipUrl, always, up, True,
diff --git a/scripts/checkimages.py b/scripts/checkimages.py
index 315c606..19f07e6 100644
--- a/scripts/checkimages.py
+++ b/scripts/checkimages.py
@@ -1770,7 +1770,7 @@
                 firstPageTitle = arg[7:]
             firstPageTitle = firstPageTitle.split(":")[1:]
             generator = pywikibot.Site().allpages(start=firstPageTitle,
-                                                     namespace=6)
+                                                  namespace=6)
             repeat = False
         elif arg.startswith('-page'):
             if len(arg) == 5:
@@ -1804,7 +1804,7 @@
                 catName = str(arg[5:])
             catSelected = pywikibot.Category(pywikibot.Site(),
                                              'Category:%s' % catName)
-            generator = pg.CategorizedPageGenerator(catSelected)
+            generator = catSelected.articles(namespaces=[6])
             repeat = False
         elif arg.startswith('-ref'):
             if len(arg) == 4:
@@ -1812,8 +1812,8 @@
                     u'The references of what page should I parse?'))
             elif len(arg) > 4:
                 refName = str(arg[5:])
-            generator = pg.ReferringPageGenerator(
-                pywikibot.Page(pywikibot.Site(), refName))
+            ref = pywikibot.Page(pywikibot.Site(), refName)
+            generator = ref.getReferences(namespaces=[6])
             repeat = False
 
     if not generator:
@@ -1862,7 +1862,6 @@
         Bot.takesettings()
         if waitTime:
             generator = Bot.wait(waitTime, generator, normal, limit)
-        generator = pg.NamespaceFilterPageGenerator(generator, 6, site)
         for image in generator:
             # Setting the image for the main class
             Bot.setParameters(image.title(withNamespace=False))
diff --git a/scripts/commonscat.py b/scripts/commonscat.py
index cc9fb19..6df29f7 100755
--- a/scripts/commonscat.py
+++ b/scripts/commonscat.py
@@ -500,10 +500,7 @@
     @type args: list of unicode
     """
     options = {}
-    generator = None
     checkcurrent = False
-    ns = []
-    ns.append(14)
 
     # Process global args and prepare generator args parser
     local_args = pywikibot.handle_args(args)
@@ -527,14 +524,10 @@
         primaryCommonscat, commonscatAlternatives = \
             CommonscatBot.getCommonscatTemplate(
                 site.code)
-        generator = pagegenerators.NamespaceFilterPageGenerator(
-            pagegenerators.ReferringPageGenerator(
-                pywikibot.Page(site, u'Template:' + primaryCommonscat),
-                onlyTemplateInclusion=True),
-            ns,
-            site)
-
-    if not generator:
+        template_page = pywikibot.Page(site, u'Template:' + primaryCommonscat)
+        generator = template_page.getReferences(namespaces=14,
+                                                onlyTemplateInclusion=True)
+    else:
         generator = genFactory.getCombinedGenerator()
 
     if generator:
diff --git a/scripts/disambredir.py b/scripts/disambredir.py
index 4f3cdcf..8739f3a 100644
--- a/scripts/disambredir.py
+++ b/scripts/disambredir.py
@@ -156,7 +156,6 @@
     """
     local_args = pywikibot.handle_args(args)
 
-    generator = None
     start = local_args[0] if local_args else '!'
 
     mysite = pywikibot.Site()
@@ -164,17 +163,13 @@
         mysite.disambcategory()
     except pywikibot.Error as e:
         pywikibot.output(e)
-    else:
-        generator = pagegenerators.CategorizedPageGenerator(
-            mysite.disambcategory(), start=start)
-
-    if not generator:
         pywikibot.showHelp()
         return
 
+    generator = pagegenerators.CategorizedPageGenerator(
+        mysite.disambcategory(), start=start, content=True, namespaces=[0])
+
     # only work on articles
-    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
-    generator = pagegenerators.PreloadingGenerator(generator)
     pagestodo = []
     pagestoload = []
     for page in generator:
diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py
index 10de73d..180163e 100644
--- a/scripts/fixing_redirects.py
+++ b/scripts/fixing_redirects.py
@@ -217,8 +217,7 @@
     if featured:
         featuredList = i18n.translate(mysite, featured_articles)
         ref = pywikibot.Page(pywikibot.Site(), featuredList)
-        gen = pagegenerators.ReferringPageGenerator(ref)
-        gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
+        gen = ref.getReferences(namespaces=[0])
     if not gen:
         gen = genFactory.getCombinedGenerator()
     if gen:
diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py
index ebdf184..3125fdb 100644
--- a/scripts/nowcommons.py
+++ b/scripts/nowcommons.py
@@ -271,11 +271,10 @@
             nowCommonsTemplates = [pywikibot.Page(self.site, title,
                                                   ns=10)
                                    for title in self.ncTemplates()]
-            gens = [pg.ReferringPageGenerator(t, followRedirects=True,
-                                              onlyTemplateInclusion=True)
+            gens = [t.getReferences(followRedirects=True, namespaces=[6],
+                                    onlyTemplateInclusion=True)
                     for t in nowCommonsTemplates]
             gen = pg.CombinedPageGenerator(gens)
-            gen = pg.NamespaceFilterPageGenerator(gen, [6])
             gen = pg.DuplicateFilterPageGenerator(gen)
             gen = pg.PreloadingGenerator(gen)
         return gen
diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py
index 83d1050..e5c07e2 100644
--- a/scripts/solve_disambiguation.py
+++ b/scripts/solve_disambiguation.py
@@ -1090,15 +1090,9 @@
             minimum = int(arg[5:])
         elif arg.startswith('-start'):
             try:
-                if len(arg) <= len('-start:'):
-                    generator = pagegenerators.CategorizedPageGenerator(
-                        pywikibot.Site().disambcategory())
-                else:
-                    generator = pagegenerators.CategorizedPageGenerator(
-                        pywikibot.Site().disambcategory(),
-                        start=arg[7:])
-                generator = pagegenerators.NamespaceFilterPageGenerator(
-                    generator, [0])
+                generator = pagegenerators.CategorizedPageGenerator(
+                    pywikibot.Site().disambcategory(),
+                    start=arg[7:], namespaces=[0])
             except pywikibot.NoPage:
                 pywikibot.output("Disambiguation category for your wiki is not 
known.")
                 raise
diff --git a/scripts/spamremove.py b/scripts/spamremove.py
index 92f70a2..0f8303e 100755
--- a/scripts/spamremove.py
+++ b/scripts/spamremove.py
@@ -34,7 +34,7 @@
 #
 
 import pywikibot
-from pywikibot import pagegenerators, i18n
+from pywikibot import i18n
 from pywikibot.editor import TextEditor
 
 
@@ -67,10 +67,7 @@
         return
 
     mysite = pywikibot.Site()
-    pages = mysite.exturlusage(spamSite)
-    if namespaces:
-        pages = pagegenerators.NamespaceFilterPageGenerator(pages, namespaces)
-    pages = pagegenerators.PreloadingGenerator(pages)
+    pages = mysite.exturlusage(spamSite, namespaces=namespaces, content=True)
 
     summary = i18n.twtranslate(mysite, 'spamremove-remove',
                                {'url': spamSite})
diff --git a/scripts/templatecount.py b/scripts/templatecount.py
index 036b012..8e36b94 100644
--- a/scripts/templatecount.py
+++ b/scripts/templatecount.py
@@ -40,7 +40,6 @@
 
 import datetime
 import pywikibot
-from pywikibot import pagegenerators
 
 templates = ['ref', 'note', 'ref label', 'note label', 'reflist']
 
@@ -100,12 +99,8 @@
         mytpl = mysite.ns_index(mysite.template_namespace())
         for template in templates:
             transcludingArray = []
-            gen = pagegenerators.ReferringPageGenerator(
-                pywikibot.Page(mysite, template, ns=mytpl),
-                onlyTemplateInclusion=True)
-            if namespaces:
-                gen = pagegenerators.NamespaceFilterPageGenerator(gen,
-                                                                  namespaces)
+            gen = pywikibot.Page(mysite, template, ns=mytpl).getReferences(
+                namespaces=namespaces, onlyTemplateInclusion=True)
             for page in gen:
                 transcludingArray.append(page)
             yield template, transcludingArray
diff --git a/scripts/unlink.py b/scripts/unlink.py
index ea24f7c..de05fdd 100755
--- a/scripts/unlink.py
+++ b/scripts/unlink.py
@@ -31,7 +31,7 @@
 import re
 import pywikibot
 from pywikibot.editor import TextEditor
-from pywikibot import pagegenerators, i18n, Bot
+from pywikibot import i18n, Bot
 
 
 class UnlinkBot(Bot):
@@ -49,10 +49,8 @@
         self.pageToUnlink = pageToUnlink
         linktrail = self.pageToUnlink.site.linktrail()
 
-        gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
-        if self.getOption('namespaces') != []:
-            gen = pagegenerators.NamespaceFilterPageGenerator(gen, 
self.getOption('namespaces'))
-        self.generator = pagegenerators.PreloadingGenerator(gen)
+        self.generator = pageToUnlink.getReferences(
+            namespaces=self.getOption('namespaces'), content=True)
         # The regular expression which finds links. Results consist of four
         # groups:
         #
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index a94bb79..ba33030 100644
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -214,7 +214,7 @@
                         continue
                     self.skipping = False
                 page = pywikibot.Page(self.site, entry.title)
-                if not self.namespaces == []:
+                if self.namespaces:
                     if page.namespace() not in self.namespaces:
                         continue
                 found = False
@@ -852,9 +852,6 @@
     """
     gen = None
     xmlFilename = None
-    # Which namespaces should be processed?
-    # default to [] which means all namespaces will be processed
-    namespaces = []
     HTTPignore = []
     day = 7
 
@@ -867,11 +864,6 @@
             config.report_dead_links_on_talk = True
         elif arg == '-notalk':
             config.report_dead_links_on_talk = False
-        elif arg.startswith('-namespace:'):
-            try:
-                namespaces.append(int(arg[11:]))
-            except ValueError:
-                namespaces.append(arg[11:])
         elif arg == '-repeat':
             gen = RepeatPageGenerator()
         elif arg.startswith('-ignore:'):
@@ -897,13 +889,11 @@
             xmlStart
         except NameError:
             xmlStart = None
-        gen = XmlDumpPageGenerator(xmlFilename, xmlStart, namespaces)
+        gen = XmlDumpPageGenerator(xmlFilename, xmlStart, 
genFactory.namespaces)
 
     if not gen:
         gen = genFactory.getCombinedGenerator()
     if gen:
-        if namespaces != []:
-            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
         # fetch at least 240 pages simultaneously from the wiki, but more if
         # a high thread number is set.
         pageNumber = max(240, config.max_external_links * 2)

-- 
To view, visit https://gerrit.wikimedia.org/r/185982
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iddc36b040ff010467559ea8fd7523056a511cb6f
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to