XZise has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/185982

Change subject: [FEAT] Scripts: Use more intelligent generators
......................................................................

[FEAT] Scripts: Use more intelligent generators

Instead of using the pagegenerators as often as possible, this is only
using them, when they provide additional funtionality. Otherwise it's
using the normal methods of a page. This removes almost all usages of
the NamespaceFilterPageGenerator which does only filter out invalid
pages but still requests them.

Change-Id: Iddc36b040ff010467559ea8fd7523056a511cb6f
---
M pywikibot/pagegenerators.py
M scripts/commonscat.py
M scripts/disambredir.py
M scripts/fixing_redirects.py
M scripts/nowcommons.py
M scripts/solve_disambiguation.py
M scripts/spamremove.py
M scripts/templatecount.py
M scripts/unlink.py
M scripts/weblinkchecker.py
10 files changed, 26 insertions(+), 58 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/82/185982/1

diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 6f01605..20cb8dd 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -995,7 +995,8 @@
 
 
 def CategorizedPageGenerator(category, recurse=False, start=None,
-                             step=None, total=None, content=False):
+                             step=None, total=None, content=False,
+                             namespaces=None):
     """Yield all pages in a specific category.
 
     If recurse is True, pages in subcategories are included as well; if
@@ -1011,7 +1012,7 @@
 
     """
     kwargs = dict(recurse=recurse, step=step, total=total,
-                  content=content)
+                  content=content, namespaces=namespaces)
     if start:
         kwargs['sortby'] = 'sortkey'
         kwargs['startsort'] = start
diff --git a/scripts/commonscat.py b/scripts/commonscat.py
index d276360..11cd9f4 100755
--- a/scripts/commonscat.py
+++ b/scripts/commonscat.py
@@ -547,12 +547,9 @@
         primaryCommonscat, commonscatAlternatives = \
             CommonscatBot.getCommonscatTemplate(
                 site.code)
-        generator = pagegenerators.NamespaceFilterPageGenerator(
-            pagegenerators.ReferringPageGenerator(
-                pywikibot.Page(site, u'Template:' + primaryCommonscat),
-                onlyTemplateInclusion=True),
-            ns,
-            site)
+        generator = (
+            pywikibot.Page(site, u'Template:' + primaryCommonscat).
+            getReferences(namespaces=ns, onlyTemplateInclusion=True))
 
     if not generator:
         generator = genFactory.getCombinedGenerator()
diff --git a/scripts/disambredir.py b/scripts/disambredir.py
index 4f3cdcf..36a8af5 100644
--- a/scripts/disambredir.py
+++ b/scripts/disambredir.py
@@ -164,17 +164,13 @@
         mysite.disambcategory()
     except pywikibot.Error as e:
         pywikibot.output(e)
-    else:
-        generator = pagegenerators.CategorizedPageGenerator(
-            mysite.disambcategory(), start=start)
-
-    if not generator:
         pywikibot.showHelp()
         return
+    else:
+        generator = pagegenerators.CategorizedPageGenerator(
+            mysite.disambcategory(), start=start, content=True, namespaces=[0])
 
     # only work on articles
-    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
-    generator = pagegenerators.PreloadingGenerator(generator)
     pagestodo = []
     pagestoload = []
     for page in generator:
diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py
index 10de73d..894ade8 100644
--- a/scripts/fixing_redirects.py
+++ b/scripts/fixing_redirects.py
@@ -216,9 +216,8 @@
 
     if featured:
         featuredList = i18n.translate(mysite, featured_articles)
-        ref = pywikibot.Page(pywikibot.Site(), featuredList)
-        gen = pagegenerators.ReferringPageGenerator(ref)
-        gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
+        gen = pywikibot.Page(pywikibot.Site(), featuredList).getReferences(
+            namespaces=[0])
     if not gen:
         gen = genFactory.getCombinedGenerator()
     if gen:
diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py
index ebdf184..3125fdb 100644
--- a/scripts/nowcommons.py
+++ b/scripts/nowcommons.py
@@ -271,11 +271,10 @@
             nowCommonsTemplates = [pywikibot.Page(self.site, title,
                                                   ns=10)
                                    for title in self.ncTemplates()]
-            gens = [pg.ReferringPageGenerator(t, followRedirects=True,
-                                              onlyTemplateInclusion=True)
+            gens = [t.getReferences(followRedirects=True, namespaces=[6],
+                                    onlyTemplateInclusion=True)
                     for t in nowCommonsTemplates]
             gen = pg.CombinedPageGenerator(gens)
-            gen = pg.NamespaceFilterPageGenerator(gen, [6])
             gen = pg.DuplicateFilterPageGenerator(gen)
             gen = pg.PreloadingGenerator(gen)
         return gen
diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py
index 83d1050..e5c07e2 100644
--- a/scripts/solve_disambiguation.py
+++ b/scripts/solve_disambiguation.py
@@ -1090,15 +1090,9 @@
             minimum = int(arg[5:])
         elif arg.startswith('-start'):
             try:
-                if len(arg) <= len('-start:'):
-                    generator = pagegenerators.CategorizedPageGenerator(
-                        pywikibot.Site().disambcategory())
-                else:
-                    generator = pagegenerators.CategorizedPageGenerator(
-                        pywikibot.Site().disambcategory(),
-                        start=arg[7:])
-                generator = pagegenerators.NamespaceFilterPageGenerator(
-                    generator, [0])
+                generator = pagegenerators.CategorizedPageGenerator(
+                    pywikibot.Site().disambcategory(),
+                    start=arg[7:], namespaces=[0])
             except pywikibot.NoPage:
                 pywikibot.output("Disambiguation category for your wiki is not 
known.")
                 raise
diff --git a/scripts/spamremove.py b/scripts/spamremove.py
index 92f70a2..405cb85 100755
--- a/scripts/spamremove.py
+++ b/scripts/spamremove.py
@@ -34,7 +34,7 @@
 #
 
 import pywikibot
-from pywikibot import pagegenerators, i18n
+from pywikibot import i18n
 from pywikibot.editor import TextEditor
 
 
@@ -67,10 +67,8 @@
         return
 
     mysite = pywikibot.Site()
-    pages = mysite.exturlusage(spamSite)
-    if namespaces:
-        pages = pagegenerators.NamespaceFilterPageGenerator(pages, namespaces)
-    pages = pagegenerators.PreloadingGenerator(pages)
+    pages = mysite.exturlusage(spamSite, namespaces=namespaces or None,
+                               content=True)
 
     summary = i18n.twtranslate(mysite, 'spamremove-remove',
                                {'url': spamSite})
diff --git a/scripts/templatecount.py b/scripts/templatecount.py
index ab70b97..0dd045c 100644
--- a/scripts/templatecount.py
+++ b/scripts/templatecount.py
@@ -40,7 +40,6 @@
 
 import datetime
 import pywikibot
-from pywikibot import pagegenerators
 
 templates = ['ref', 'note', 'ref label', 'note label', 'reflist']
 
@@ -100,11 +99,8 @@
         mytpl = mysite.getNamespaceIndex(mysite.template_namespace())
         for template in templates:
             transcludingArray = []
-            gen = pagegenerators.ReferringPageGenerator(
-                pywikibot.Page(mysite, template, ns=mytpl),
-                onlyTemplateInclusion=True)
-            if namespaces:
-                gen = pagegenerators.NamespaceFilterPageGenerator(gen, 
namespaces)
+            gen = pywikibot.Page(mysite, template, ns=mytpl).getReferences(
+                namespaces=namespaces or None, onlyTemplateInclusion=True)
             for page in gen:
                 transcludingArray.append(page)
             yield template, transcludingArray
diff --git a/scripts/unlink.py b/scripts/unlink.py
index ea24f7c..f26d81b 100755
--- a/scripts/unlink.py
+++ b/scripts/unlink.py
@@ -31,7 +31,7 @@
 import re
 import pywikibot
 from pywikibot.editor import TextEditor
-from pywikibot import pagegenerators, i18n, Bot
+from pywikibot import i18n, Bot
 
 
 class UnlinkBot(Bot):
@@ -49,10 +49,8 @@
         self.pageToUnlink = pageToUnlink
         linktrail = self.pageToUnlink.site.linktrail()
 
-        gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
-        if self.getOption('namespaces') != []:
-            gen = pagegenerators.NamespaceFilterPageGenerator(gen, 
self.getOption('namespaces'))
-        self.generator = pagegenerators.PreloadingGenerator(gen)
+        self.generator = pageToUnlink.getReferences(
+            namespaces=self.getOption('namespaces') or None, content=True)
         # The regular expression which finds links. Results consist of four
         # groups:
         #
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 4bc522f..1b3e248 100644
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -213,7 +213,7 @@
                         continue
                     self.skipping = False
                 page = pywikibot.Page(self.site, entry.title)
-                if not self.namespaces == []:
+                if self.namespaces:
                     if page.namespace() not in self.namespaces:
                         continue
                 found = False
@@ -851,9 +851,6 @@
     """
     gen = None
     xmlFilename = None
-    # Which namespaces should be processed?
-    # default to [] which means all namespaces will be processed
-    namespaces = []
     HTTPignore = []
     day = 7
 
@@ -866,11 +863,6 @@
             config.report_dead_links_on_talk = True
         elif arg == '-notalk':
             config.report_dead_links_on_talk = False
-        elif arg.startswith('-namespace:'):
-            try:
-                namespaces.append(int(arg[11:]))
-            except ValueError:
-                namespaces.append(arg[11:])
         elif arg == '-repeat':
             gen = RepeatPageGenerator()
         elif arg.startswith('-ignore:'):
@@ -896,13 +888,11 @@
             xmlStart
         except NameError:
             xmlStart = None
-        gen = XmlDumpPageGenerator(xmlFilename, xmlStart, namespaces)
+        gen = XmlDumpPageGenerator(xmlFilename, xmlStart, 
genFactory.namespaces)
 
     if not gen:
         gen = genFactory.getCombinedGenerator()
     if gen:
-        if namespaces != []:
-            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
         # fetch at least 240 pages simultaneously from the wiki, but more if
         # a high thread number is set.
         pageNumber = max(240, config.max_external_links * 2)

-- 
To view, visit https://gerrit.wikimedia.org/r/185982
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iddc36b040ff010467559ea8fd7523056a511cb6f
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to