comparison mercurial/patch.py @ 35277:6ba79cf34f5e

patch: add within-line color diff capacity The `diff' command usually writes deletion in red and insertions in green. This patch adds within-line colors, to highlight which part of the lines differ. Lines to compare are decided based on their similarity ratio, as computed by difflib SequenceMatcher, with an arbitrary threshold (0.7) to decide at which point two lines are considered entirely different (therefore no inline-diff required). The current implementation is kept behind an experimental flag in order to test the effect on performance. In order to activate it, set inline-color-diff to true in [experimental].
author Matthieu Laneuville <matthieu.laneuville@octobus.net>
date Thu, 26 Oct 2017 00:13:38 +0900
parents a1d2fc32bb99
children 10cce12fdcd3
comparison
equal deleted inserted replaced
35276:205c3c6c1a51 35277:6ba79cf34f5e
8 8
9 from __future__ import absolute_import, print_function 9 from __future__ import absolute_import, print_function
10 10
11 import collections 11 import collections
12 import copy 12 import copy
13 import difflib
13 import email 14 import email
14 import errno 15 import errno
15 import hashlib 16 import hashlib
16 import os 17 import os
17 import posixpath 18 import posixpath
2250 buildopts = { 2251 buildopts = {
2251 'nodates': get('nodates'), 2252 'nodates': get('nodates'),
2252 'showfunc': get('show_function', 'showfunc'), 2253 'showfunc': get('show_function', 'showfunc'),
2253 'context': get('unified', getter=ui.config), 2254 'context': get('unified', getter=ui.config),
2254 } 2255 }
2256 buildopts['worddiff'] = ui.configbool('experimental', 'worddiff')
2255 2257
2256 if git: 2258 if git:
2257 buildopts['git'] = get('git') 2259 buildopts['git'] = get('git')
2258 2260
2259 # since this is in the experimental section, we need to call 2261 # since this is in the experimental section, we need to call
2461 else: 2463 else:
2462 return difffn(opts, None) 2464 return difffn(opts, None)
2463 2465
2464 def difflabel(func, *args, **kw): 2466 def difflabel(func, *args, **kw):
2465 '''yields 2-tuples of (output, label) based on the output of func()''' 2467 '''yields 2-tuples of (output, label) based on the output of func()'''
2468 inlinecolor = False
2469 if kw.get('opts'):
2470 inlinecolor = kw['opts'].worddiff
2466 headprefixes = [('diff', 'diff.diffline'), 2471 headprefixes = [('diff', 'diff.diffline'),
2467 ('copy', 'diff.extended'), 2472 ('copy', 'diff.extended'),
2468 ('rename', 'diff.extended'), 2473 ('rename', 'diff.extended'),
2469 ('old', 'diff.extended'), 2474 ('old', 'diff.extended'),
2470 ('new', 'diff.extended'), 2475 ('new', 'diff.extended'),
2477 ('-', 'diff.deleted'), 2482 ('-', 'diff.deleted'),
2478 ('+', 'diff.inserted')] 2483 ('+', 'diff.inserted')]
2479 head = False 2484 head = False
2480 for chunk in func(*args, **kw): 2485 for chunk in func(*args, **kw):
2481 lines = chunk.split('\n') 2486 lines = chunk.split('\n')
2487 matches = {}
2488 if inlinecolor:
2489 matches = _findmatches(lines)
2482 for i, line in enumerate(lines): 2490 for i, line in enumerate(lines):
2483 if i != 0: 2491 if i != 0:
2484 yield ('\n', '') 2492 yield ('\n', '')
2485 if head: 2493 if head:
2486 if line.startswith('@'): 2494 if line.startswith('@'):
2504 if diffline: 2512 if diffline:
2505 for token in tabsplitter.findall(stripline): 2513 for token in tabsplitter.findall(stripline):
2506 if '\t' == token[0]: 2514 if '\t' == token[0]:
2507 yield (token, 'diff.tab') 2515 yield (token, 'diff.tab')
2508 else: 2516 else:
2509 yield (token, label) 2517 if i in matches:
2518 for l, t in _inlinediff(
2519 lines[i].rstrip(),
2520 lines[matches[i]].rstrip(),
2521 label):
2522 yield (t, l)
2523 else:
2524 yield (token, label)
2510 else: 2525 else:
2511 yield (stripline, label) 2526 yield (stripline, label)
2512 break 2527 break
2513 else: 2528 else:
2514 yield (line, '') 2529 yield (line, '')
2515 if line != stripline: 2530 if line != stripline:
2516 yield (line[len(stripline):], 'diff.trailingwhitespace') 2531 yield (line[len(stripline):], 'diff.trailingwhitespace')
2532
2533 def _findmatches(slist):
2534 '''Look for insertion matches to deletion and returns a dict of
2535 correspondences.
2536 '''
2537 lastmatch = 0
2538 matches = {}
2539 for i, line in enumerate(slist):
2540 if line == '':
2541 continue
2542 if line[0] == '-':
2543 lastmatch = max(lastmatch, i)
2544 newgroup = False
2545 for j, newline in enumerate(slist[lastmatch + 1:]):
2546 if newline == '':
2547 continue
2548 if newline[0] == '-' and newgroup: # too far, no match
2549 break
2550 if newline[0] == '+': # potential match
2551 newgroup = True
2552 sim = difflib.SequenceMatcher(None, line, newline).ratio()
2553 if sim > 0.7:
2554 lastmatch = lastmatch + 1 + j
2555 matches[i] = lastmatch
2556 matches[lastmatch] = i
2557 break
2558 return matches
2559
2560 def _inlinediff(s1, s2, operation):
2561 '''Perform string diff to highlight specific changes.'''
2562 operation_skip = '+?' if operation == 'diff.deleted' else '-?'
2563 if operation == 'diff.deleted':
2564 s2, s1 = s1, s2
2565
2566 buff = []
2567 # we never want to higlight the leading +-
2568 if operation == 'diff.deleted' and s2.startswith('-'):
2569 label = operation
2570 token = '-'
2571 s2 = s2[1:]
2572 s1 = s1[1:]
2573 elif operation == 'diff.inserted' and s1.startswith('+'):
2574 label = operation
2575 token = '+'
2576 s2 = s2[1:]
2577 s1 = s1[1:]
2578
2579 s = difflib.ndiff(re.split(br'(\W)', s2), re.split(br'(\W)', s1))
2580 for part in s:
2581 if part[0] in operation_skip:
2582 continue
2583 l = operation + '.highlight'
2584 if part[0] in ' ':
2585 l = operation
2586 if l == label: # contiguous token with same label
2587 token += part[2:]
2588 continue
2589 else:
2590 buff.append((label, token))
2591 label = l
2592 token = part[2:]
2593 buff.append((label, token))
2594
2595 return buff
2517 2596
2518 def diffui(*args, **kw): 2597 def diffui(*args, **kw):
2519 '''like diff(), but yields 2-tuples of (output, label) for ui.write()''' 2598 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2520 return difflabel(diff, *args, **kw) 2599 return difflabel(diff, *args, **kw)
2521 2600