Xqt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/226500

Change subject: [WIP] Bugfix for T105621
......................................................................

[WIP] Bugfix for T105621

DO NOT MERGE

Change-Id: Id2e73afff7b85d8d1b229d27fff837cfe11a253a
---
M pywikibot/textlib.py
1 file changed, 62 insertions(+), 11 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/00/226500/1

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index a0b4d99..6740b97 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -54,16 +54,8 @@
 # that allows system variables, but does not match nested templates.
 # It exists for backwards compatibility to the old 'TEMP_REGEX'
 # which was the _ETP_REGEX.
-TEMP_REGEX = DeprecatedRegex(r"""
-{{\s*(?:msg:)?\s*
-  (?P<name>[^{\|]+?)\s*
-  (?:\|(?P<params>[^{]*
-        (?:(?:{}|{{[A-Z]+(?:\:[^}])?}}|{{{[^}]+}}}) [^{]*)*
-       )?
-  )?
-}}
-""", re.VERBOSE, 'textlib.TEMP_REGEX', 'textlib.NESTED_TEMPLATE_REGEX')
-
+TEMP_REGEX = re.compile(
+    
r'{{(?:msg:)?(?P<name>[^{\|]+?)(?:\|(?P<params>[^{]+?(?:{[^{]+?}[^{]*?)?))?}}')
 # The regex below collects nested templates, providing simpler
 # identification of templates used at the top-level of wikitext.
 # It doesnt match {{{1|...}}}, however it also does not match templates
@@ -141,7 +133,6 @@
         'source':       re.compile(r'(?is)<source .*?</source>'),
         # inline references
         'ref':          re.compile(r'(?ism)<ref[ >].*?</ref>'),
-        'template':     NESTED_TEMPLATE_REGEX,
         # lines that start with a space are shown in a monospace font and
         # have whitespace preserved.
         'startspace':   re.compile(r'(?m)^ (.*?)$'),
@@ -198,6 +189,10 @@
                     result.append(_regex_cache[(exc, site)])
                 else:
                     result.append(_regex_cache[exc])
+            elif exc == 'template':
+                # template is not supported by this method.
+                print 'pass template'
+                pass
             else:
                 # nowiki, noinclude, includeonly, timeline, math ond other
                 # extensions
@@ -251,7 +246,57 @@
         return text + marker
 
     dontTouchRegexes = _get_regexes(exceptions, site)
+    print exceptions
+    except_templates = 'template' in exceptions
 
+    # mark templates
+    # don't care about mw variables and parser functions
+    if except_templates:
+        print '#######'
+        marker1 = findmarker(text)
+        marker2 = findmarker(text, u'##', u'#')
+        Rvalue = re.compile('{{{.+?}}}')
+        Rmarker1 = re.compile(r'%(mark)s(\d+)%(mark)s' % {'mark': marker1})
+        Rmarker2 = re.compile(r'%(mark)s(\d+)%(mark)s' % {'mark': marker2})
+        # hide the flat template marker
+        dontTouchRegexes.append(Rmarker1)
+        origin = text
+        values = {}
+        count = 0
+        for m in Rvalue.finditer(text):
+            count += 1
+            # If we have digits between brackets, restoring from dict may fail.
+            # So we need to change the index. We have to search in the origin.
+            while u'}}}%d{{{' % count in origin:
+                count += 1
+            item = m.group()
+            text = text.replace(item, '%s%d%s' % (marker2, count, marker2))
+            values[count] = item
+        inside = {}
+        seen = set()
+        count = 0
+        while TEMP_REGEX.search(text) is not None:
+            for m in TEMP_REGEX.finditer(text):
+                item = m.group()
+                if item in seen:
+                    continue  # speed up
+                seen.add(item)
+                count += 1
+                while u'}}%d{{' % count in origin:
+                    count += 1
+                text = text.replace(item, '%s%d%s' % (marker1, count, marker1))
+
+                # Make sure stored templates don't contain markers
+                for m2 in Rmarker1.finditer(item):
+                    item = item.replace(m2.group(), inside[int(m2.group(1))])
+                for m2 in Rmarker2.finditer(item):
+                    item = item.replace(m2.group(), values[int(m2.group(1))])
+                inside[count] = item
+    for i in range(1, count+1):
+        try:
+            print bytes(inside[i])
+        except:
+            print i, '???'
     index = 0
     markerpos = len(text)
     while True:
@@ -330,6 +375,12 @@
                 index += 1
             markerpos = match.start() + len(replacement)
     text = text[:markerpos] + marker + text[markerpos:]
+
+    if except_templates:  # restore templates from dict
+        for m2 in Rmarker1.finditer(text):
+            text = text.replace(m2.group(), inside[int(m2.group(1))])
+        for m2 in Rmarker2.finditer(text):
+            text = text.replace(m2.group(), values[int(m2.group(1))])
     return text
 
 

-- 
To view, visit https://gerrit.wikimedia.org/r/226500
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id2e73afff7b85d8d1b229d27fff837cfe11a253a
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <i...@gno.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to