jenkins-bot has submitted this change and it was merged.

Change subject: Cherrypick: select and apply diffs between texts

Cherrypick: select and apply diffs between texts

Display diffs, allow their selection and apply approved diffs.
Return text obtained by applying only approved diffs.

Change-Id: I11b5f36b2416f3f9a9209141d9df3fab483004b8
A pywikibot/
1 file changed, 337 insertions(+), 0 deletions(-)

  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/pywikibot/ b/pywikibot/
new file mode 100644
index 0000000..490b0df
--- /dev/null
+++ b/pywikibot/
@@ -0,0 +1,337 @@
+# -*- coding: utf-8  -*-
+User-interface related functions
+# (C) Pywikibot team, 2014
+# Distributed under the terms of the MIT license.
+__version__ = '$Id$'
+import difflib
+import itertools
+import pywikibot
+class Hunk(object):
+    """One change hunk between a and b.
+    a and b: two sequences of lines.
+    grouped_opcode: list of 5-tuples describing how to turn a into b.
+        it has the same format as returned by difflib.get_opcodes().
+    Note: parts of this code are taken from by difflib.get_grouped_opcodes().
+    """
+    APPR = 1
+    NOT_APPR = -1
+    PENDING = 0
+    def __init__(self, a, b, grouped_opcode):
+        self.a = a
+        self.b = b
+ = grouped_opcode
+        self.header = u''
+        self.colors = {
+            '+': 'lightgreen',
+            '-': 'lightred',
+        }
+        self.diff = list(self.create_diff())
+        self.diff_plain_text = u''.join(self.diff)
+        self.diff_text = u''.join(self.format_diff())
+        first, last =[0],[-1]
+        self.a_rng = (first[1], last[2])
+        self.b_rng = (first[3], last[4])
+        self.header = self.get_header()
+        self.diff_plain_text = u'%s\n%s' % (self.header, self.diff_plain_text)
+        self.diff_text = u'%s' % self.diff_text
+        self.reviewed = self.PENDING
+    def get_header(self):
+        """Provide header of unified diff."""
+        a_rng = difflib._format_range_unified(*self.a_rng)
+        b_rng = difflib._format_range_unified(*self.b_rng)
+        return '@@ -{} +{} @@{}'.format(a_rng, b_rng, '\n')
+    def create_diff(self):
+        """Generator of diff text for this hunk, without formatting."""
+        # make sure each line ends with '\n' to prevent
+        # behaviour like
+        def check_line(l):
+            if not l.endswith('\n'):
+                return l + '\n'
+            return l
+        for tag, i1, i2, j1, j2 in
+            if tag == 'equal':
+                for line in self.a[i1:i2]:
+                    yield ' ' + check_line(line)
+            if tag in ('delete'):
+                for line in self.a[i1:i2]:
+                    yield '-' + check_line(line)
+            if tag in ('insert'):
+                for line in self.b[j1:j2]:
+                    yield '+' + check_line(line)
+            if tag in ('replace'):
+                for line in difflib.ndiff(self.a[i1:i2], self.b[j1:j2]):
+                    yield check_line(line)
+    def format_diff(self):
+        """Color diff lines."""
+        diff = iter(self.diff)
+        l1, l2 = '', next(diff)
+        for line in diff:
+            l1, l2 = l2, line
+            # do not show lines starting with '?'.
+            if l1.startswith('?'):
+                continue
+            if l2.startswith('?'):
+                yield self.color_line(l1, l2)
+            else:
+                yield self.color_line(l1)
+        # handle last line
+        if not l2.startswith('?'):
+            yield self.color_line(l2)
+    def color_line(self, line, line_ref=None):
+        """Color line characters.
+        If line_ref is None, the whole line is colored.
+        If line_ref[i] is not blank, line[i] is colored.
+        Color depends if line starts with +/-.
+        line: string
+        line_ref: string.
+        """
+        color = line[0]
+        if line_ref is None:
+            if color in self.colors:
+                colored_line = '\03{%s}%s\03{default}' % (self.colors[color], 
+                return colored_line
+            else:
+                return line
+        colored_line = u''
+        state = 'Close'
+        for char, char_ref in itertools.izip_longest(line, line_ref.strip(), 
fillvalue=' '):
+            char_tagged = char
+            if state == 'Close':
+                if char_ref != ' ':
+                    char_tagged = '\03{%s}%s' % (self.colors[color], char)
+                    state = 'Open'
+            elif state == 'Open':
+                if char_ref == ' ':
+                    char_tagged = '\03{default}%s' % char
+                    state = 'Close'
+            colored_line += char_tagged
+        if state == 'Open':
+            colored_line += '\03{default}'
+        return colored_line
+    def apply(self):
+        """Turn a into b for this hunk."""
+        return self.b[self.b_rng[0]:self.b_rng[1]]
+    def __str__(self):
+        return u''.join(self.diff_plain_text)
+    def __repr__(self):
+        """Return a reconstructable representation."""
+        # TODO
+        return '%s(a, b, %s)' \
+               % (self.__class__.__name__,
+class PatchManager(object):
+    """Apply patches to text_a to obtain a new text.
+    If all hunks are approved, text_b will be obtained.
+        letter by letter.
+    """
+    def __init__(self, text_a, text_b, n=0, by_letter=False):
+        """Constructor.
+           text_a: string
+           text_b: string
+           n: int, line of context as defined in difflib.get_grouped_opcodes().
+           by_letter: if text_a and text_b are single lines, comparison can be 
+        """
+        if '\n' in text_a or '\n' in text_b:
+            self.a = text_a.splitlines(1)
+            self.b = text_b.splitlines(1)
+        else:
+            if by_letter:
+                self.a = text_a
+                self.b = text_b
+            else:
+                self.a = text_a.splitlines(1)
+                self.b = text_b.splitlines(1)
+        # groups and hunk have same order (one hunk correspond to one group).
+        s = difflib.SequenceMatcher(None, self.a, self.b)
+        self.groups = list(s.get_grouped_opcodes(n))
+        self.hunks = [Hunk(self.a, self.b, group) for group in self.groups]
+        # blocks are a superset of hunk, as include also parts not
+        # included in any hunk.
+        self.blocks = self.get_blocks()
+    def get_blocks(self):
+        """Return list with blocks of indexes which compose a and, where 
applicable, b.
+        Format of each block:
+            [-1, (i1, i2), (-1, -1)] -> block a[i1:i2] does not change from a 
to b
+                then is there is no corresponding hunk.
+            [hunk index, (i1, i2), (j1, j2)] -> block a[i1:i2] becomes b[j1:j2]
+        """
+        blocks = []
+        i2 = 0
+        for hunk_idx, group in enumerate(self.groups):
+            first, last = group[0], group[-1]
+            i1, prev_i2, i2 = first[1], i2, last[2]
+            # there is a section of unchanged text before this hunk.
+            if prev_i2 < i1:
+                rng = (-1, (prev_i2, i1), (-1, -1))
+                blocks.append(rng)
+            rng = (hunk_idx, (first[1], last[2]), (first[3], last[4]))
+            blocks.append(rng)
+        # there is a section of unchanged text at the end of a, b.
+        if i2 < len(self.a):
+            rng = (-1, (last[2], len(self.a)), (-1, -1))
+            blocks.append(rng)
+        return blocks
+    def review_hunks(self):
+        "Review hunks."
+        help_msg = ['y -> accept this hunk',
+                    'n -> do not accept this hunk',
+                    's -> do not accept this hunk and stop reviewing',
+                    'a -> accept this hunk and all other pending',
+                    'r -> review later',
+                    'h -> help',
+                    ]
+        question = 'Accept this hunk?'
+        answers = ['yes', 'no', 'stop', 'all', 'review', 'help']
+        hotkeys = ['y', 'n', 's', 'a', 'r', 'h']
+        actions = {'y': Hunk.APPR,
+                   'n': Hunk.NOT_APPR,
+                   's': Hunk.NOT_APPR,
+                   'a': Hunk.APPR,
+                   'r': Hunk.PENDING,
+                   }
+        pending = [h for h in self.hunks if h.reviewed == h.PENDING]
+        while pending:
+            hunk = pending.pop(0)
+            pywikibot.output(hunk.header + hunk.diff_text)
+            choice = pywikibot.inputChoice(question, answers, hotkeys, 
+            if choice in actions.keys():
+                hunk.reviewed = actions[choice]
+            if choice == 's':
+                while pending:
+                    hunk = pending.pop(0)
+                    hunk.reviewed = hunk.NOT_APPR
+                break
+            elif choice == 'a':
+                while pending:
+                    hunk = pending.pop(0)
+                    hunk.reviewed = hunk.APPR
+                break
+            elif choice == 'h':
+                pywikibot.output(u'\03{purple}%s\03{default}' % 
+                pending.insert(0, hunk)
+            elif choice == 'r':
+                pending.append(hunk)
+        return
+    def apply(self):
+        """Apply changes. If there are undecided changes, ask to review."""
+        if any(h.reviewed == h.PENDING for h in self.hunks):
+            pywikibot.output("There are unreviewed hunks.\n"
+                             "Please review them before proceeding.\n")
+            self.review_hunks()
+        l_text = []
+        for hunk_idx, (i1, i2), (j1, j2) in self.blocks:
+            # unchanged text.
+            if hunk_idx < 0:
+                l_text.extend(self.a[i1:i2])
+            # changed text; check if hunk is approved.
+            else:
+                hunk = self.hunks[hunk_idx]
+                if hunk.reviewed == hunk.APPR:
+                    l_text.extend(self.b[j1:j2])
+                else:
+                    l_text.extend(self.a[i1:i2])
+        # Make a sanity check in case all are approved.
+        if all(h.reviewed == h.APPR for h in self.hunks):
+            assert u''.join(l_text) == u''.join(self.b)
+        return l_text
+def cherry_pick(oldtext, newtext, n=0, by_letter=False):
+    """Propose a list of changes for approval.
+    Text with approved changes will be returned.
+    n: int, line of context as defined in difflib.get_grouped_opcodes().
+    by_letter: if text_a and text_b are single lines, comparison can be done
+    """
+    patch = PatchManager(oldtext, newtext, n=n, by_letter=by_letter)
+    pywikibot.output('\03{{lightpurple}}\n{:*^50}\03{{default}}\n'.format('  
+    for hunk in patch.hunks:
+        pywikibot.output(hunk.diff_text)
+    pywikibot.output('\03{{lightpurple}}\n{:*^50}\03{{default}}\n'.format('  
+    text_list = patch.apply()
+    pywikibot.output('\03{{lightpurple}}\n{:*^50}\03{{default}}\n'.format('  
+    if any(hunk.reviewed == hunk.APPR for hunk in patch.hunks):
+        for hunk in patch.hunks:
+            if hunk.reviewed == hunk.APPR:
+                pywikibot.output(hunk.diff_text)
+    else:
+    text = ''.join(text_list)
+    return text

To view, visit
To unsubscribe, visit

Gerrit-MessageType: merged
Gerrit-Change-Id: I11b5f36b2416f3f9a9209141d9df3fab483004b8
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <>
Gerrit-Reviewer: John Vandenberg <>
Gerrit-Reviewer: Ladsgroup <>
Gerrit-Reviewer: Merlijn van Deen <>
Gerrit-Reviewer: Mpaa <>
Gerrit-Reviewer: Ricordisamoa <>
Gerrit-Reviewer: jenkins-bot <>

MediaWiki-commits mailing list

Reply via email to