diff mercurial/patch.py @ 35277:6ba79cf34f5e

patch: add within-line color diff capacity The `diff' command usually writes deletion in red and insertions in green. This patch adds within-line colors, to highlight which part of the lines differ. Lines to compare are decided based on their similarity ratio, as computed by difflib SequenceMatcher, with an arbitrary threshold (0.7) to decide at which point two lines are considered entirely different (therefore no inline-diff required). The current implementation is kept behind an experimental flag in order to test the effect on performance. In order to activate it, set inline-color-diff to true in [experimental].
author Matthieu Laneuville <matthieu.laneuville@octobus.net>
date Thu, 26 Oct 2017 00:13:38 +0900
parents a1d2fc32bb99
children 10cce12fdcd3
line wrap: on
line diff
--- a/mercurial/patch.py	Thu Sep 22 18:23:58 2016 +0900
+++ b/mercurial/patch.py	Thu Oct 26 00:13:38 2017 +0900
@@ -10,6 +10,7 @@
 
 import collections
 import copy
+import difflib
 import email
 import errno
 import hashlib
@@ -2252,6 +2253,7 @@
         'showfunc': get('show_function', 'showfunc'),
         'context': get('unified', getter=ui.config),
     }
+    buildopts['worddiff'] = ui.configbool('experimental', 'worddiff')
 
     if git:
         buildopts['git'] = get('git')
@@ -2463,6 +2465,9 @@
 
 def difflabel(func, *args, **kw):
     '''yields 2-tuples of (output, label) based on the output of func()'''
+    inlinecolor = False
+    if kw.get('opts'):
+        inlinecolor = kw['opts'].worddiff
     headprefixes = [('diff', 'diff.diffline'),
                     ('copy', 'diff.extended'),
                     ('rename', 'diff.extended'),
@@ -2479,6 +2484,9 @@
     head = False
     for chunk in func(*args, **kw):
         lines = chunk.split('\n')
+        matches = {}
+        if inlinecolor:
+            matches = _findmatches(lines)
         for i, line in enumerate(lines):
             if i != 0:
                 yield ('\n', '')
@@ -2506,7 +2514,14 @@
                             if '\t' == token[0]:
                                 yield (token, 'diff.tab')
                             else:
-                                yield (token, label)
+                                if i in matches:
+                                    for l, t in _inlinediff(
+                                                  lines[i].rstrip(),
+                                                  lines[matches[i]].rstrip(),
+                                                  label):
+                                        yield (t, l)
+                                else:
+                                    yield (token, label)
                     else:
                         yield (stripline, label)
                     break
@@ -2515,6 +2530,70 @@
             if line != stripline:
                 yield (line[len(stripline):], 'diff.trailingwhitespace')
 
+def _findmatches(slist):
+    '''Look for insertion matches to deletion and returns a dict of
+    correspondences.
+    '''
+    lastmatch = 0
+    matches = {}
+    for i, line in enumerate(slist):
+        if line == '':
+            continue
+        if line[0] == '-':
+            lastmatch = max(lastmatch, i)
+            newgroup = False
+            for j, newline in enumerate(slist[lastmatch + 1:]):
+                if newline == '':
+                    continue
+                if newline[0] == '-' and newgroup: # too far, no match
+                    break
+                if newline[0] == '+': # potential match
+                    newgroup = True
+                    sim = difflib.SequenceMatcher(None, line, newline).ratio()
+                    if sim > 0.7:
+                        lastmatch = lastmatch + 1 + j
+                        matches[i] = lastmatch
+                        matches[lastmatch] = i
+                        break
+    return matches
+
+def _inlinediff(s1, s2, operation):
+    '''Perform string diff to highlight specific changes.'''
+    operation_skip = '+?' if operation == 'diff.deleted' else '-?'
+    if operation == 'diff.deleted':
+        s2, s1 = s1, s2
+
+    buff = []
+    # we never want to higlight the leading +-
+    if operation == 'diff.deleted' and s2.startswith('-'):
+        label = operation
+        token = '-'
+        s2 = s2[1:]
+        s1 = s1[1:]
+    elif operation == 'diff.inserted' and s1.startswith('+'):
+        label = operation
+        token = '+'
+        s2 = s2[1:]
+        s1 = s1[1:]
+
+    s = difflib.ndiff(re.split(br'(\W)', s2), re.split(br'(\W)', s1))
+    for part in s:
+        if part[0] in operation_skip:
+            continue
+        l = operation + '.highlight'
+        if part[0] in ' ':
+            l = operation
+        if l == label: # contiguous token with same label
+            token += part[2:]
+            continue
+        else:
+            buff.append((label, token))
+            label = l
+            token = part[2:]
+    buff.append((label, token))
+
+    return buff
+
 def diffui(*args, **kw):
     '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
     return difflabel(diff, *args, **kw)