On Mon, 27 Nov 2017 21:38:48 +0900, matthieu.laneuvi...@octobus.net wrote: > # HG changeset patch > # User Matthieu Laneuville <matthieu.laneuvi...@octobus.net> > # Date 1508944418 -32400 > # Thu Oct 26 00:13:38 2017 +0900 > # Node ID 9a6865e011743fa354dc1ae27d66600e022cddad > # Parent 32bb27dd52825236ba1b6c06fe60e140d6b5ea45 > # EXP-Topic inline-diff > cmdutil: add within-line color diff capacity
Sorry for late review. I played with this patch and I like the feature, but the output seems not so well, e.g. highlighted parts are too broad and too few. A couple of comments follow: > --- a/mercurial/cmdutil.py Tue Nov 21 00:24:09 2017 -0500 > +++ b/mercurial/cmdutil.py Thu Oct 26 00:13:38 2017 +0900 > @@ -7,6 +7,7 @@ > > from __future__ import absolute_import > > +import difflib > import errno > import itertools > import os > @@ -1513,6 +1514,11 @@ def diffordiffstat(ui, repo, diffopts, n > ui.warn(_('warning: %s not inside relative root %s\n') % ( > match.uipath(matchroot), uirelroot)) > > + store = { > + 'diff.inserted': [], > + 'diff.deleted': [] > + } > + status = False Nit: 'status = None' is preferred as it isn't a boolean type. > if stat: > diffopts = diffopts.copy(context=0) > width = 80 > @@ -1529,7 +1535,31 @@ def diffordiffstat(ui, repo, diffopts, n > changes, diffopts, prefix=prefix, > relroot=relroot, > hunksfilterfn=hunksfilterfn): > - write(chunk, label=label) > + > + if not ui.configbool("experimental", "inline-color-diff"): > + write(chunk, label=label) > + continue > + > + # Each deleted/inserted chunk is followed by an EOL chunk with '' > + # label. The 'status' flag helps us grab that second line. > + if label in ['diff.deleted', 'diff.inserted'] or status: > + if status: > + store[status].append(chunk) > + status = False > + else: > + store[label].append(chunk) > + status = label > + continue > + > + if store['diff.inserted'] or store['diff.deleted']: > + for line, l in _chunkdiff(store): > + write(line, label=l) > + > + store['diff.inserted'] = [] > + store['diff.deleted'] = [] > + > + if chunk: > + write(chunk, label=label) Can't we add a word-diff feature by hooking/replacing patch.difflabel() function? I think this "store" hack is needed because we're post-processing a labeled output instead of doing that at the right layer. > +def _chunkdiff(store): > + '''Returns a (line, label) iterator over a corresponding deletion and > + insertion set. The set has to be considered as a whole in order to > match > + lines and perform inline coloring. > + ''' > + def chunkiterator(list1, list2, direction): > + '''For each string in list1, finds matching string in list2 and > returns > + an iterator over their differences. > + ''' > + used = [] > + for a in list1: > + done = False > + for i, b in enumerate(list2): > + if done or i in used: > + continue > + if difflib.SequenceMatcher(None, a, b).ratio() > 0.7: > + buff = _inlinediff(a, b, direction=direction) > + for line in buff: > + yield (line[1], line[0]) > + done = True > + used.append(i) # insure lines in b can be matched only > once It seems wrong to allow crossed matches between list1 and list2. For example, if list1[0] matched list2[5], list1[1] shouldn't match list2[4]. > + if not done: > + yield (a, 'diff.' + direction) > + > + insert = store['diff.inserted'] > + delete = store['diff.deleted'] > + return itertools.chain(chunkiterator(delete, insert, 'deleted'), > + chunkiterator(insert, delete, 'inserted')) > + > +def _inlinediff(from_string, to_string, direction): Nit: name_with_underscore is banned. https://www.mercurial-scm.org/wiki/CodingStyle#Naming_conventions > + '''Perform string diff to highlight specific changes.''' > + direction_skip = '+?' if direction == 'deleted' else '-?' > + if direction == 'deleted': > + to_string, from_string = from_string, to_string > + > + # buffer required to remove last space, there may be smarter ways to do > this > + buff = [] > + > + # we never want to higlight the leading +- > + if direction == 'deleted' and to_string.startswith('-'): > + buff.append(('diff.deleted', '-')) > + to_string = to_string[1:] > + from_string = from_string[1:] > + elif direction == 'inserted' and from_string.startswith('+'): > + buff.append(('diff.inserted', '+')) > + to_string = to_string[1:] > + from_string = from_string[1:] > + > + s = difflib.ndiff(to_string.split(' '), from_string.split(' ')) re.split(r'(\W)', s) will produce a better result. https://stackoverflow.com/a/2136580 FWIW, I tried the following change on top of this patch. The output was too verbose, but seemed more correct. diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py --- a/mercurial/cmdutil.py +++ b/mercurial/cmdutil.py @@ -1587,6 +1587,11 @@ def _chunkdiff(store): '''For each string in list1, finds matching string in list2 and returns an iterator over their differences. ''' + buff = _inlinediff(''.join(list1), ''.join(list2), direction=direction) + for line in buff: + yield (line[1], line[0]) + return + used = [] for a in list1: done = False @@ -1626,14 +1631,14 @@ def _inlinediff(from_string, to_string, to_string = to_string[1:] from_string = from_string[1:] - s = difflib.ndiff(to_string.split(' '), from_string.split(' ')) + s = difflib.ndiff(re.split(r'(\W)', to_string), re.split(r'(\W)', from_string)) for line in s: if line[0] in direction_skip: continue l = 'diff.' + direction + '.highlight' if line[0] in ' ': # unchanged parts l = 'diff.' + direction - buff.append((l, line[2:] + ' ')) + buff.append((l, line[2:])) buff[-1] = (buff[-1][0], buff[-1][1].strip(' ')) return buff _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel