Xqt has uploaded a new change for review. https://gerrit.wikimedia.org/r/226500
Change subject: [WIP] Bugfix for T105621 ...................................................................... [WIP] Bugfix for T105621 DO NOT MERGE Change-Id: Id2e73afff7b85d8d1b229d27fff837cfe11a253a --- M pywikibot/textlib.py 1 file changed, 62 insertions(+), 11 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/00/226500/1 diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index a0b4d99..6740b97 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -54,16 +54,8 @@ # that allows system variables, but does not match nested templates. # It exists for backwards compatibility to the old 'TEMP_REGEX' # which was the _ETP_REGEX. -TEMP_REGEX = DeprecatedRegex(r""" -{{\s*(?:msg:)?\s* - (?P<name>[^{\|]+?)\s* - (?:\|(?P<params>[^{]* - (?:(?:{}|{{[A-Z]+(?:\:[^}])?}}|{{{[^}]+}}}) [^{]*)* - )? - )? -}} -""", re.VERBOSE, 'textlib.TEMP_REGEX', 'textlib.NESTED_TEMPLATE_REGEX') - +TEMP_REGEX = re.compile( + r'{{(?:msg:)?(?P<name>[^{\|]+?)(?:\|(?P<params>[^{]+?(?:{[^{]+?}[^{]*?)?))?}}') # The regex below collects nested templates, providing simpler # identification of templates used at the top-level of wikitext. # It doesnt match {{{1|...}}}, however it also does not match templates @@ -141,7 +133,6 @@ 'source': re.compile(r'(?is)<source .*?</source>'), # inline references 'ref': re.compile(r'(?ism)<ref[ >].*?</ref>'), - 'template': NESTED_TEMPLATE_REGEX, # lines that start with a space are shown in a monospace font and # have whitespace preserved. 'startspace': re.compile(r'(?m)^ (.*?)$'), @@ -198,6 +189,10 @@ result.append(_regex_cache[(exc, site)]) else: result.append(_regex_cache[exc]) + elif exc == 'template': + # template is not supported by this method. + print 'pass template' + pass else: # nowiki, noinclude, includeonly, timeline, math ond other # extensions @@ -251,7 +246,57 @@ return text + marker dontTouchRegexes = _get_regexes(exceptions, site) + print exceptions + except_templates = 'template' in exceptions + # mark templates + # don't care about mw variables and parser functions + if except_templates: + print '#######' + marker1 = findmarker(text) + marker2 = findmarker(text, u'##', u'#') + Rvalue = re.compile('{{{.+?}}}') + Rmarker1 = re.compile(r'%(mark)s(\d+)%(mark)s' % {'mark': marker1}) + Rmarker2 = re.compile(r'%(mark)s(\d+)%(mark)s' % {'mark': marker2}) + # hide the flat template marker + dontTouchRegexes.append(Rmarker1) + origin = text + values = {} + count = 0 + for m in Rvalue.finditer(text): + count += 1 + # If we have digits between brackets, restoring from dict may fail. + # So we need to change the index. We have to search in the origin. + while u'}}}%d{{{' % count in origin: + count += 1 + item = m.group() + text = text.replace(item, '%s%d%s' % (marker2, count, marker2)) + values[count] = item + inside = {} + seen = set() + count = 0 + while TEMP_REGEX.search(text) is not None: + for m in TEMP_REGEX.finditer(text): + item = m.group() + if item in seen: + continue # speed up + seen.add(item) + count += 1 + while u'}}%d{{' % count in origin: + count += 1 + text = text.replace(item, '%s%d%s' % (marker1, count, marker1)) + + # Make sure stored templates don't contain markers + for m2 in Rmarker1.finditer(item): + item = item.replace(m2.group(), inside[int(m2.group(1))]) + for m2 in Rmarker2.finditer(item): + item = item.replace(m2.group(), values[int(m2.group(1))]) + inside[count] = item + for i in range(1, count+1): + try: + print bytes(inside[i]) + except: + print i, '???' index = 0 markerpos = len(text) while True: @@ -330,6 +375,12 @@ index += 1 markerpos = match.start() + len(replacement) text = text[:markerpos] + marker + text[markerpos:] + + if except_templates: # restore templates from dict + for m2 in Rmarker1.finditer(text): + text = text.replace(m2.group(), inside[int(m2.group(1))]) + for m2 in Rmarker2.finditer(text): + text = text.replace(m2.group(), values[int(m2.group(1))]) return text -- To view, visit https://gerrit.wikimedia.org/r/226500 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id2e73afff7b85d8d1b229d27fff837cfe11a253a Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Xqt <i...@gno.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits