Xqt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/255933

Change subject: [bugfix] Revert "New textlib.replace_links() link replacements"
......................................................................

[bugfix] Revert "New textlib.replace_links() link replacements"

- unbreak the bot which creates missleading links when the source or
  target is a section link.
- ignore target section if it does not exist
- restore the same script behaviour like in compat

This reverts commit 18e6c9b1e655a807ce0ab732880c9874093cef1b
for fixing_redirects.py

Bug: T68403
Change-Id: If9226cf514dff2b6e26f4227f537dc7690d1aec2
---
M scripts/fixing_redirects.py
1 file changed, 89 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/33/255933/1

diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py
index 9273772..391eb37 100755
--- a/scripts/fixing_redirects.py
+++ b/scripts/fixing_redirects.py
@@ -21,11 +21,15 @@
 
 __version__ = '$Id$'
 #
+import re
+
 import pywikibot
 from pywikibot import pagegenerators
 from pywikibot.bot import (SingleSiteBot, ExistingPageBot, NoRedirectPageBot,
                            AutomaticTWSummaryBot, suggest_help)
+from pywikibot.textlib import does_text_contain_section
 from pywikibot.tools.formatter import color_format
+from pywikibot.tools import first_lower, first_upper as firstcap
 
 # This is required for the text that is shown when you run this script
 # with the parameter -help.
@@ -46,6 +50,81 @@
     ignore_server_errors = True
     summary_key = 'fixing_redirects-fixing'
 
+    def replace_links(self, text, linkedPage, targetPage):
+        """Replace all source links by target."""
+        mysite = pywikibot.Site()
+        linktrail = mysite.linktrail()
+
+        # make a backup of the original text so we can show the changes later
+        linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?'
+                           r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + 
linktrail + ')')
+        curpos = 0
+        # This loop will run until we have finished the current page
+        while True:
+            m = linkR.search(text, pos=curpos)
+            if not m:
+                break
+            # Make sure that next time around we will not find this same hit.
+            curpos = m.start() + 1
+            # ignore interwiki links and links to sections of the same page
+            if m.group('title').strip() == '' or \
+               mysite.isInterwikiLink(m.group('title')):
+                continue
+            else:
+                actualLinkPage = pywikibot.Page(targetPage.site, 
m.group('title'))
+                # Check whether the link found is to page.
+                if actualLinkPage != linkedPage:
+                    continue
+
+            # The link looks like this:
+            # [[page_title|link_text]]trailing_chars
+            page_title = m.group('title')
+            link_text = m.group('label')
+
+            if not link_text:
+                # or like this: [[page_title]]trailing_chars
+                link_text = page_title
+            if m.group('section') is None:
+                section = ''
+            else:
+                section = m.group('section')
+            if section and targetPage.section():
+                pywikibot.warning(
+                    'Source section {0} and target section {1} found. '
+                    'Skipping.'.format(section, targetPage))
+                continue
+            trailing_chars = m.group('linktrail')
+            if trailing_chars:
+                link_text += trailing_chars
+
+            # remove preleading ":"
+            if link_text[0] == ':':
+                link_text = link_text[1:]
+            if link_text[0].isupper() or link_text[0].isdigit():
+                new_page_title = targetPage.title()
+            else:
+                new_page_title = first_lower(targetPage.title())
+
+            # remove preleading ":"
+            if new_page_title[0] == ':':
+                new_page_title = new_page_title[1:]
+
+            if (new_page_title == link_text and not section):
+                newlink = "[[%s]]" % new_page_title
+            # check if we can create a link with trailing characters instead 
of a
+            # pipelink
+            elif len(new_page_title) <= len(link_text) and \
+                 firstcap(link_text[:len(new_page_title)]) == \
+                 firstcap(new_page_title) and \
+                 re.sub(re.compile(linktrail), '', 
link_text[len(new_page_title):]) == '' and not section:
+                newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
+                                        link_text[len(new_page_title):])
+            else:
+                newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
+            text = text[:m.start()] + newlink + text[m.end():]
+            continue
+        return text
+
     def treat_page(self):
         """Change all redirects from the current page to actual links."""
         links = self.current_page.linkedPages()
@@ -65,12 +144,21 @@
                 except (pywikibot.CircularRedirect,
                         pywikibot.InvalidTitle):
                     continue
+                else:
+                    section = target.section()
+                    if section and not does_text_contain_section(target.text,
+                                                                 section):
+                        pywikibot.warning(
+                            'Section #{0} not found on page {1}'.format(
+                                section, target.title(asLink=True,
+                                                      withSection=False)))
+                        continue
             else:
                 continue
             # no fix to user namespaces
             if target.namespace() in [2, 3] and page.namespace() not in [2, 3]:
                 continue
-            newtext = pywikibot.textlib.replace_links(newtext, [page, target])
+            newtext = self.replace_links(newtext, page, target)
 
         if i is None:
             pywikibot.output('Nothing left to do.')

-- 
To view, visit https://gerrit.wikimedia.org/r/255933
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If9226cf514dff2b6e26f4227f537dc7690d1aec2
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <i...@gno.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to