Gallaecio has uploaded a new change for review. https://gerrit.wikimedia.org/r/182444
Change subject: replace.py: Added support for replacing in page titles ...................................................................... replace.py: Added support for replacing in page titles Change-Id: I27f929dbfd08efda559000f6d281b40f5d7e4098 --- M scripts/replace.py 1 file changed, 186 insertions(+), 89 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/44/182444/1 diff --git a/scripts/replace.py b/scripts/replace.py index a8fc8cd..0d527a6 100755 --- a/scripts/replace.py +++ b/scripts/replace.py @@ -70,6 +70,13 @@ (or no replacements are defined via -fix or the arguments) it'll ask for additional replacements at start. +-scope:XYZ Works only on the specified scope. XYZ may be any of the + following: + * content (default): Search and replace on page content. + * titles: Search and replace on page titles. + * all: Search and replace on both page content and page + titles. + &fixes-help; -always Don't prompt you for each replacement @@ -388,7 +395,8 @@ def __init__(self, generator, replacements, exceptions={}, acceptall=False, allowoverlap=False, recursive=False, - addedCat=None, sleep=None, summary='', site=None): + addedCat=None, sleep=None, summary='', site=None, + scopes=["content"]): """ Constructor. @@ -405,6 +413,9 @@ replaced. * addedCat - If set to a value, add this category to every page touched. + * scope - Determines the scopes where the replace robot + works. Possible scopes are page content + ("content") and page names ("names"). Structure of the exceptions dictionary: This dictionary can have these keys: @@ -441,6 +452,8 @@ self.sleep = sleep self.summary = summary self.changed_pages = 0 + self.moved_pages = 0 + self.scopes = scopes def isTitleExcepted(self, title): """ @@ -528,6 +541,153 @@ )) return u'; '.join(summary_messages) + def work_on_page_content(self, page): + applied = set() + original_text = page.get(get_redirect=True) + new_text = original_text + while True: + if self.isTextExcepted(new_text): + pywikibot.output(u'Skipping content of %s because it contains ' + u'text that is on the exceptions list.' + % page.title(asLink=True)) + break + last_text = None + while new_text != last_text: + last_text = new_text + new_text = self.apply_replacements(last_text, applied) + if not self.recursive: + break + if new_text == original_text: + pywikibot.output(u'No changes were necessary in content of %s' + % page.title(asLink=True)) + break + if hasattr(self, "addedCat"): + cats = page.categories(nofollow_redirects=True) + if self.addedCat not in cats: + cats.append(self.addedCat) + new_text = textlib.replaceCategoryLinks(new_text, + cats, + site=page.site) + # Show the title of the page we're working on. + # Highlight the title in purple. + pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % page.title()) + pywikibot.showDiff(original_text, new_text) + if self.acceptall: + break + choice = pywikibot.input_choice( + u'Do you want to accept these changes?', + [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'), + ('open in Browser', 'b'), ('all', 'a')], + default='N') + if choice == 'e': + editor = editarticle.TextEditor() + as_edited = editor.edit(original_text) + # if user didn't press Cancel + if as_edited and as_edited != new_text: + new_text = as_edited + continue + if choice == 'b': + webbrowser.open("http://%s%s" % ( + page.site.hostname(), + page.site.nice_get_address(page.title(asUrl=True)) + )) + i18n.input('pywikibot-enter-finished-browser') + try: + original_text = page.get(get_redirect=True, force=True) + except pywikibot.NoPage: + pywikibot.output(u'Page %s has been deleted.' + % page.title()) + break + new_text = original_text + continue + if choice == 'a': + self.acceptall = True + if choice == 'y': + page.put_async(new_text, self.generate_summary(applied), callback=self.count_changes) + # choice must be 'N' + break + if self.acceptall and new_text != original_text: + try: + page.put(new_text, self.generate_summary(applied), callback=self.count_changes) + except pywikibot.EditConflict: + pywikibot.output(u'Skipping %s because of edit conflict' + % (page.title(),)) + except pywikibot.SpamfilterError as e: + pywikibot.output( + u'Cannot change %s because of blacklist entry %s' + % (page.title(), e.url)) + except pywikibot.LockedPage: + pywikibot.output(u'Skipping %s (locked page)' + % (page.title(),)) + except pywikibot.PageNotSaved as error: + pywikibot.output(u'Error putting page: %s' + % (error.args,)) + + def work_on_page_title(self, page): + applied = set() + original_title = page.title(withNamespace=False) + new_title = original_title + while True: + if self.isTextExcepted(new_title): + pywikibot.output(u'Skipping title of %s because it contains ' + u'text that is on the exceptions list.' + % page.title(asLink=True)) + break + last_title = None + while new_title != last_title: + last_title = new_title + new_title = self.apply_replacements(last_title, applied) + if not self.recursive: + break + if new_title == original_title: + pywikibot.output(u'No changes were necessary in title of %s' + % page.title(asLink=True)) + break + if hasattr(self, "addedCat"): + cats = page.categories(nofollow_redirects=True) + if self.addedCat not in cats: + cats.append(self.addedCat) + new_title = textlib.replaceCategoryLinks(new_title, + cats, + site=page.site) + if page.namespace(): + original_title_with_namespace = \ + self.site.namespace(page.namespace()) + ":" + original_title + new_title_with_namespace = \ + self.site.namespace(page.namespace()) + ":" + new_title + else: + original_title_with_namespace = original_title + new_title_with_namespace = new_title + pywikibot.output( + u"\n\n>>> \03{lightpurple}%s\03{default} (title change) <<<" + % original_title_with_namespace) + pywikibot.showDiff(original_title, new_title) + if self.acceptall: + break + choice = pywikibot.input_choice( + u'Do you want to accept these changes?', + [('Yes', 'y'), ('No', 'n'), + ('open in Browser', 'b'), ('all', 'a')], + default='N') + if choice == 'b': + webbrowser.open("http://%s%s" % ( + page.site.hostname(), + page.site.nice_get_address(page.title(asUrl=True)) + )) + i18n.input('pywikibot-enter-finished-browser') + continue + if choice == 'a': + self.acceptall = True + if choice == 'y': + page.move(new_title_with_namespace, self.generate_summary(applied)) + self.moved_pages += 1 + # choice must be 'N' + break + if self.acceptall and new_title != original_title: + page.move(new_title_with_namespace, self.generate_summary(applied)) + self.moved_pages += 1 + def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be @@ -539,95 +699,18 @@ % page.title(asLink=True)) continue try: - # Load the page's text from the wiki - original_text = page.get(get_redirect=True) - if not page.canBeEdited(): - pywikibot.output(u"You can't edit page %s" - % page.title(asLink=True)) - continue + page.get(get_redirect=True) except pywikibot.NoPage: pywikibot.output(u'Page %s not found' % page.title(asLink=True)) continue - applied = set() - new_text = original_text - while True: - if self.isTextExcepted(new_text): - pywikibot.output(u'Skipping %s because it contains text ' - u'that is on the exceptions list.' - % page.title(asLink=True)) - break - last_text = None - while new_text != last_text: - last_text = new_text - new_text = self.apply_replacements(last_text, applied) - if not self.recursive: - break - if new_text == original_text: - pywikibot.output(u'No changes were necessary in %s' - % page.title(asLink=True)) - break - if hasattr(self, "addedCat"): - cats = page.categories(nofollow_redirects=True) - if self.addedCat not in cats: - cats.append(self.addedCat) - new_text = textlib.replaceCategoryLinks(new_text, - cats, - site=page.site) - # Show the title of the page we're working on. - # Highlight the title in purple. - pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" - % page.title()) - pywikibot.showDiff(original_text, new_text) - if self.acceptall: - break - choice = pywikibot.input_choice( - u'Do you want to accept these changes?', - [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'), - ('open in Browser', 'b'), ('all', 'a')], - default='N') - if choice == 'e': - editor = editarticle.TextEditor() - as_edited = editor.edit(original_text) - # if user didn't press Cancel - if as_edited and as_edited != new_text: - new_text = as_edited - continue - if choice == 'b': - webbrowser.open("http://%s%s" % ( - page.site.hostname(), - page.site.nice_get_address(page.title(asUrl=True)) - )) - i18n.input('pywikibot-enter-finished-browser') - try: - original_text = page.get(get_redirect=True, force=True) - except pywikibot.NoPage: - pywikibot.output(u'Page %s has been deleted.' - % page.title()) - break - new_text = original_text - continue - if choice == 'a': - self.acceptall = True - if choice == 'y': - page.put_async(new_text, self.generate_summary(applied), callback=self.count_changes) - # choice must be 'N' - break - if self.acceptall and new_text != original_text: - try: - page.put(new_text, self.generate_summary(applied), callback=self.count_changes) - except pywikibot.EditConflict: - pywikibot.output(u'Skipping %s because of edit conflict' - % (page.title(),)) - except pywikibot.SpamfilterError as e: - pywikibot.output( - u'Cannot change %s because of blacklist entry %s' - % (page.title(), e.url)) - except pywikibot.LockedPage: - pywikibot.output(u'Skipping %s (locked page)' - % (page.title(),)) - except pywikibot.PageNotSaved as error: - pywikibot.output(u'Error putting page: %s' - % (error.args,)) + if not page.canBeEdited(): + pywikibot.output(u"You can't edit page %s" + % page.title(asLink=True)) + continue + if "content" in self.scopes: + self.work_on_page_content(page) + if "titles" in self.scopes: + self.work_on_page_title(page) def prepareRegexForMySQL(pattern): @@ -697,6 +780,9 @@ sleep = None # Request manual replacements even if replacements are already defined manual_input = False + # Scope of the search and replace: page content, page titles or both. + scopes = ["content"] + supported_scopes = ["content", "titles", "all"] # Read commandline parameters. @@ -753,6 +839,14 @@ allowoverlap = True elif arg.startswith('-manualinput'): manual_input = True + elif arg.startswith('-scope:'): + value = arg[7:] + if value not in supported_scopes: + raise pywikibot.Error('invalid scope specified.') + if value == "all": + scopes = ["content", "titles"] + else: + scopes = value else: commandline_replacements.append(arg) @@ -872,14 +966,17 @@ preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, edit_summary, - site) + site, scopes) site.login() bot.run() # Explicitly call pywikibot.stopme(). # It will make sure the callback is triggered before replace.py is unloaded. pywikibot.stopme() - pywikibot.output(u'\n%s pages changed.' % bot.changed_pages) + if bot.changed_pages: + pywikibot.output(u'\n%s pages changed.' % bot.changed_pages) + if bot.moved_pages: + pywikibot.output(u'\n%s pages moved.' % bot.moved_pages) if __name__ == "__main__": -- To view, visit https://gerrit.wikimedia.org/r/182444 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I27f929dbfd08efda559000f6d281b40f5d7e4098 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Gallaecio <adriyeticha...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits