Xqt has uploaded a new change for review. https://gerrit.wikimedia.org/r/255933
Change subject: [bugfix] Revert "New textlib.replace_links() link replacements" ...................................................................... [bugfix] Revert "New textlib.replace_links() link replacements" - unbreak the bot which creates missleading links when the source or target is a section link. - ignore target section if it does not exist - restore the same script behaviour like in compat This reverts commit 18e6c9b1e655a807ce0ab732880c9874093cef1b for fixing_redirects.py Bug: T68403 Change-Id: If9226cf514dff2b6e26f4227f537dc7690d1aec2 --- M scripts/fixing_redirects.py 1 file changed, 89 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/33/255933/1 diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py index 9273772..391eb37 100755 --- a/scripts/fixing_redirects.py +++ b/scripts/fixing_redirects.py @@ -21,11 +21,15 @@ __version__ = '$Id$' # +import re + import pywikibot from pywikibot import pagegenerators from pywikibot.bot import (SingleSiteBot, ExistingPageBot, NoRedirectPageBot, AutomaticTWSummaryBot, suggest_help) +from pywikibot.textlib import does_text_contain_section from pywikibot.tools.formatter import color_format +from pywikibot.tools import first_lower, first_upper as firstcap # This is required for the text that is shown when you run this script # with the parameter -help. @@ -46,6 +50,81 @@ ignore_server_errors = True summary_key = 'fixing_redirects-fixing' + def replace_links(self, text, linkedPage, targetPage): + """Replace all source links by target.""" + mysite = pywikibot.Site() + linktrail = mysite.linktrail() + + # make a backup of the original text so we can show the changes later + linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?' + r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') + curpos = 0 + # This loop will run until we have finished the current page + while True: + m = linkR.search(text, pos=curpos) + if not m: + break + # Make sure that next time around we will not find this same hit. + curpos = m.start() + 1 + # ignore interwiki links and links to sections of the same page + if m.group('title').strip() == '' or \ + mysite.isInterwikiLink(m.group('title')): + continue + else: + actualLinkPage = pywikibot.Page(targetPage.site, m.group('title')) + # Check whether the link found is to page. + if actualLinkPage != linkedPage: + continue + + # The link looks like this: + # [[page_title|link_text]]trailing_chars + page_title = m.group('title') + link_text = m.group('label') + + if not link_text: + # or like this: [[page_title]]trailing_chars + link_text = page_title + if m.group('section') is None: + section = '' + else: + section = m.group('section') + if section and targetPage.section(): + pywikibot.warning( + 'Source section {0} and target section {1} found. ' + 'Skipping.'.format(section, targetPage)) + continue + trailing_chars = m.group('linktrail') + if trailing_chars: + link_text += trailing_chars + + # remove preleading ":" + if link_text[0] == ':': + link_text = link_text[1:] + if link_text[0].isupper() or link_text[0].isdigit(): + new_page_title = targetPage.title() + else: + new_page_title = first_lower(targetPage.title()) + + # remove preleading ":" + if new_page_title[0] == ':': + new_page_title = new_page_title[1:] + + if (new_page_title == link_text and not section): + newlink = "[[%s]]" % new_page_title + # check if we can create a link with trailing characters instead of a + # pipelink + elif len(new_page_title) <= len(link_text) and \ + firstcap(link_text[:len(new_page_title)]) == \ + firstcap(new_page_title) and \ + re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section: + newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], + link_text[len(new_page_title):]) + else: + newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) + text = text[:m.start()] + newlink + text[m.end():] + continue + return text + def treat_page(self): """Change all redirects from the current page to actual links.""" links = self.current_page.linkedPages() @@ -65,12 +144,21 @@ except (pywikibot.CircularRedirect, pywikibot.InvalidTitle): continue + else: + section = target.section() + if section and not does_text_contain_section(target.text, + section): + pywikibot.warning( + 'Section #{0} not found on page {1}'.format( + section, target.title(asLink=True, + withSection=False))) + continue else: continue # no fix to user namespaces if target.namespace() in [2, 3] and page.namespace() not in [2, 3]: continue - newtext = pywikibot.textlib.replace_links(newtext, [page, target]) + newtext = self.replace_links(newtext, page, target) if i is None: pywikibot.output('Nothing left to do.') -- To view, visit https://gerrit.wikimedia.org/r/255933 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If9226cf514dff2b6e26f4227f537dc7690d1aec2 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Xqt <i...@gno.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits