[MediaWiki-commits] [Gerrit] replace.py: Added support for replacing in page titles - change (pywikibot/core)

Gallaecio (Code Review) Fri, 02 Jan 2015 02:39:03 -0800

Gallaecio has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/182444


Change subject: replace.py: Added support for replacing in page titles
......................................................................

replace.py: Added support for replacing in page titles

Change-Id: I27f929dbfd08efda559000f6d281b40f5d7e4098
---
M scripts/replace.py
1 file changed, 186 insertions(+), 89 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/44/182444/1

diff --git a/scripts/replace.py b/scripts/replace.py
index a8fc8cd..0d527a6 100755
--- a/scripts/replace.py
+++ b/scripts/replace.py
@@ -70,6 +70,13 @@
                   (or no replacements are defined via -fix or the arguments)
                   it'll ask for additional replacements at start.
 
+-scope:XYZ        Works only on the specified scope. XYZ may be any of the
+                  following:
+                  * content (default): Search and replace on page content.
+                  * titles: Search and replace on page titles.
+                  * all: Search and replace on both page content and page
+                    titles.
+
 &fixes-help;
 
 -always           Don't prompt you for each replacement
@@ -388,7 +395,8 @@
 
     def __init__(self, generator, replacements, exceptions={},
                  acceptall=False, allowoverlap=False, recursive=False,
-                 addedCat=None, sleep=None, summary='', site=None):
+                 addedCat=None, sleep=None, summary='', site=None,
+                 scopes=["content"]):
         """
         Constructor.
 
@@ -405,6 +413,9 @@
                              replaced.
             * addedCat     - If set to a value, add this category to every page
                              touched.
+            * scope        - Determines the scopes where the replace robot
+                             works. Possible scopes are page content
+                             ("content") and page names ("names").
 
         Structure of the exceptions dictionary:
         This dictionary can have these keys:
@@ -441,6 +452,8 @@
         self.sleep = sleep
         self.summary = summary
         self.changed_pages = 0
+        self.moved_pages = 0
+        self.scopes = scopes
 
     def isTitleExcepted(self, title):
         """
@@ -528,6 +541,153 @@
                 ))
         return u'; '.join(summary_messages)
 
+    def work_on_page_content(self, page):
+        applied = set()
+        original_text = page.get(get_redirect=True)
+        new_text = original_text
+        while True:
+            if self.isTextExcepted(new_text):
+                pywikibot.output(u'Skipping content of %s because it contains '
+                                 u'text that is on the exceptions list.'
+                                 % page.title(asLink=True))
+                break
+            last_text = None
+            while new_text != last_text:
+                last_text = new_text
+                new_text = self.apply_replacements(last_text, applied)
+                if not self.recursive:
+                    break
+            if new_text == original_text:
+                pywikibot.output(u'No changes were necessary in content of %s'
+                                % page.title(asLink=True))
+                break
+            if hasattr(self, "addedCat"):
+                cats = page.categories(nofollow_redirects=True)
+                if self.addedCat not in cats:
+                    cats.append(self.addedCat)
+                    new_text = textlib.replaceCategoryLinks(new_text,
+                                                            cats,
+                                                            site=page.site)
+            # Show the title of the page we're working on.
+            # Highlight the title in purple.
+            pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+                            % page.title())
+            pywikibot.showDiff(original_text, new_text)
+            if self.acceptall:
+                break
+            choice = pywikibot.input_choice(
+                u'Do you want to accept these changes?',
+                [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'),
+                ('open in Browser', 'b'), ('all', 'a')],
+                default='N')
+            if choice == 'e':
+                editor = editarticle.TextEditor()
+                as_edited = editor.edit(original_text)
+                # if user didn't press Cancel
+                if as_edited and as_edited != new_text:
+                    new_text = as_edited
+                continue
+            if choice == 'b':
+                webbrowser.open("http://%s%s"; % (
+                    page.site.hostname(),
+                    page.site.nice_get_address(page.title(asUrl=True))
+                ))
+                i18n.input('pywikibot-enter-finished-browser')
+                try:
+                    original_text = page.get(get_redirect=True, force=True)
+                except pywikibot.NoPage:
+                    pywikibot.output(u'Page %s has been deleted.'
+                                    % page.title())
+                    break
+                new_text = original_text
+                continue
+            if choice == 'a':
+                self.acceptall = True
+            if choice == 'y':
+                page.put_async(new_text, self.generate_summary(applied), 
callback=self.count_changes)
+            # choice must be 'N'
+            break
+        if self.acceptall and new_text != original_text:
+            try:
+                page.put(new_text, self.generate_summary(applied), 
callback=self.count_changes)
+            except pywikibot.EditConflict:
+                pywikibot.output(u'Skipping %s because of edit conflict'
+                                % (page.title(),))
+            except pywikibot.SpamfilterError as e:
+                pywikibot.output(
+                    u'Cannot change %s because of blacklist entry %s'
+                    % (page.title(), e.url))
+            except pywikibot.LockedPage:
+                pywikibot.output(u'Skipping %s (locked page)'
+                                % (page.title(),))
+            except pywikibot.PageNotSaved as error:
+                pywikibot.output(u'Error putting page: %s'
+                                % (error.args,))
+
+    def work_on_page_title(self, page):
+        applied = set()
+        original_title = page.title(withNamespace=False)
+        new_title = original_title
+        while True:
+            if self.isTextExcepted(new_title):
+                pywikibot.output(u'Skipping title of %s because it contains '
+                                 u'text that is on the exceptions list.'
+                                % page.title(asLink=True))
+                break
+            last_title = None
+            while new_title != last_title:
+                last_title = new_title
+                new_title = self.apply_replacements(last_title, applied)
+                if not self.recursive:
+                    break
+            if new_title == original_title:
+                pywikibot.output(u'No changes were necessary in title of %s'
+                                % page.title(asLink=True))
+                break
+            if hasattr(self, "addedCat"):
+                cats = page.categories(nofollow_redirects=True)
+                if self.addedCat not in cats:
+                    cats.append(self.addedCat)
+                    new_title = textlib.replaceCategoryLinks(new_title,
+                                                            cats,
+                                                            site=page.site)
+            if page.namespace():
+                original_title_with_namespace = \
+                    self.site.namespace(page.namespace()) + ":" + 
original_title
+                new_title_with_namespace = \
+                    self.site.namespace(page.namespace()) + ":" + new_title
+            else:
+                original_title_with_namespace = original_title
+                new_title_with_namespace = new_title
+            pywikibot.output(
+                u"\n\n>>> \03{lightpurple}%s\03{default} (title change) <<<"
+                % original_title_with_namespace)
+            pywikibot.showDiff(original_title, new_title)
+            if self.acceptall:
+                break
+            choice = pywikibot.input_choice(
+                u'Do you want to accept these changes?',
+                [('Yes', 'y'), ('No', 'n'),
+                ('open in Browser', 'b'), ('all', 'a')],
+                default='N')
+            if choice == 'b':
+                webbrowser.open("http://%s%s"; % (
+                    page.site.hostname(),
+                    page.site.nice_get_address(page.title(asUrl=True))
+                ))
+                i18n.input('pywikibot-enter-finished-browser')
+                continue
+            if choice == 'a':
+                self.acceptall = True
+            if choice == 'y':
+                page.move(new_title_with_namespace, 
self.generate_summary(applied))
+                self.moved_pages += 1
+            # choice must be 'N'
+            break
+        if self.acceptall and new_title != original_title:
+            page.move(new_title_with_namespace, self.generate_summary(applied))
+            self.moved_pages += 1
+
     def run(self):
         """Start the bot."""
         # Run the generator which will yield Pages which might need to be
@@ -539,95 +699,18 @@
                     % page.title(asLink=True))
                 continue
             try:
-                # Load the page's text from the wiki
-                original_text = page.get(get_redirect=True)
-                if not page.canBeEdited():
-                    pywikibot.output(u"You can't edit page %s"
-                                     % page.title(asLink=True))
-                    continue
+                page.get(get_redirect=True)
             except pywikibot.NoPage:
                 pywikibot.output(u'Page %s not found' % 
page.title(asLink=True))
                 continue
-            applied = set()
-            new_text = original_text
-            while True:
-                if self.isTextExcepted(new_text):
-                    pywikibot.output(u'Skipping %s because it contains text '
-                                     u'that is on the exceptions list.'
-                                     % page.title(asLink=True))
-                    break
-                last_text = None
-                while new_text != last_text:
-                    last_text = new_text
-                    new_text = self.apply_replacements(last_text, applied)
-                    if not self.recursive:
-                        break
-                if new_text == original_text:
-                    pywikibot.output(u'No changes were necessary in %s'
-                                     % page.title(asLink=True))
-                    break
-                if hasattr(self, "addedCat"):
-                    cats = page.categories(nofollow_redirects=True)
-                    if self.addedCat not in cats:
-                        cats.append(self.addedCat)
-                        new_text = textlib.replaceCategoryLinks(new_text,
-                                                                cats,
-                                                                site=page.site)
-                # Show the title of the page we're working on.
-                # Highlight the title in purple.
-                pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
-                                 % page.title())
-                pywikibot.showDiff(original_text, new_text)
-                if self.acceptall:
-                    break
-                choice = pywikibot.input_choice(
-                    u'Do you want to accept these changes?',
-                    [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'),
-                     ('open in Browser', 'b'), ('all', 'a')],
-                    default='N')
-                if choice == 'e':
-                    editor = editarticle.TextEditor()
-                    as_edited = editor.edit(original_text)
-                    # if user didn't press Cancel
-                    if as_edited and as_edited != new_text:
-                        new_text = as_edited
-                    continue
-                if choice == 'b':
-                    webbrowser.open("http://%s%s"; % (
-                        page.site.hostname(),
-                        page.site.nice_get_address(page.title(asUrl=True))
-                    ))
-                    i18n.input('pywikibot-enter-finished-browser')
-                    try:
-                        original_text = page.get(get_redirect=True, force=True)
-                    except pywikibot.NoPage:
-                        pywikibot.output(u'Page %s has been deleted.'
-                                         % page.title())
-                        break
-                    new_text = original_text
-                    continue
-                if choice == 'a':
-                    self.acceptall = True
-                if choice == 'y':
-                    page.put_async(new_text, self.generate_summary(applied), 
callback=self.count_changes)
-                # choice must be 'N'
-                break
-            if self.acceptall and new_text != original_text:
-                try:
-                    page.put(new_text, self.generate_summary(applied), 
callback=self.count_changes)
-                except pywikibot.EditConflict:
-                    pywikibot.output(u'Skipping %s because of edit conflict'
-                                     % (page.title(),))
-                except pywikibot.SpamfilterError as e:
-                    pywikibot.output(
-                        u'Cannot change %s because of blacklist entry %s'
-                        % (page.title(), e.url))
-                except pywikibot.LockedPage:
-                    pywikibot.output(u'Skipping %s (locked page)'
-                                     % (page.title(),))
-                except pywikibot.PageNotSaved as error:
-                    pywikibot.output(u'Error putting page: %s'
-                                     % (error.args,))
+            if not page.canBeEdited():
+                pywikibot.output(u"You can't edit page %s"
+                                % page.title(asLink=True))
+                continue
+            if "content" in self.scopes:
+                self.work_on_page_content(page)
+            if "titles" in self.scopes:
+                self.work_on_page_title(page)
 
 
 def prepareRegexForMySQL(pattern):
@@ -697,6 +780,9 @@
     sleep = None
     # Request manual replacements even if replacements are already defined
     manual_input = False
+    # Scope of the search and replace: page content, page titles or both.
+    scopes = ["content"]
+    supported_scopes = ["content", "titles", "all"]
 
     # Read commandline parameters.
 
@@ -753,6 +839,14 @@
             allowoverlap = True
         elif arg.startswith('-manualinput'):
             manual_input = True
+        elif arg.startswith('-scope:'):
+            value = arg[7:]
+            if value not in supported_scopes:
+                raise pywikibot.Error('invalid scope specified.')
+            if value == "all":
+                scopes = ["content", "titles"]
+            else:
+                scopes = value
         else:
             commandline_replacements.append(arg)
 
@@ -872,14 +966,17 @@
     preloadingGen = pagegenerators.PreloadingGenerator(gen)
     bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                        allowoverlap, recursive, add_cat, sleep, edit_summary,
-                       site)
+                       site, scopes)
     site.login()
     bot.run()
 
     # Explicitly call pywikibot.stopme().
     # It will make sure the callback is triggered before replace.py is 
unloaded.
     pywikibot.stopme()
-    pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
+    if bot.changed_pages:
+        pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
+    if bot.moved_pages:
+        pywikibot.output(u'\n%s pages moved.' % bot.moved_pages)
 
 
 if __name__ == "__main__":

-- 
To view, visit https://gerrit.wikimedia.org/r/182444
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I27f929dbfd08efda559000f6d281b40f5d7e4098
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Gallaecio <adriyeticha...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] replace.py: Added support for replacing in page titles - change (pywikibot/core)

Reply via email to