comparison mercurial/patch.py @ 37731:5471348921c1

patch: buffer lines for a same hunk Instead of yielding tokens directly, buffer them if they belong to a same hunk. This makes it easier for the upcoming new worddiff algorithm to only focus on the diff hunk, instead of having to worry about other contents. This breaks how the existing experimental worddiff algorithm works, so the algorithm was removed, and related tests are disabled for now. The next patch will add a new worddiff algorithm. Differential Revision: https://phab.mercurial-scm.org/D3211
author Jun Wu <quark@fb.com>
date Mon, 19 Mar 2018 04:28:30 -0700
parents 8d730f96e792
children 35632d392279
comparison
equal deleted inserted replaced
37730:8d730f96e792 37731:5471348921c1
9 from __future__ import absolute_import, print_function 9 from __future__ import absolute_import, print_function
10 10
11 import collections 11 import collections
12 import contextlib 12 import contextlib
13 import copy 13 import copy
14 import difflib
15 import email 14 import email
16 import errno 15 import errno
17 import hashlib 16 import hashlib
18 import os 17 import os
19 import posixpath 18 import posixpath
2479 except GitDiffRequired: 2478 except GitDiffRequired:
2480 return difffn(opts.copy(git=True), None) 2479 return difffn(opts.copy(git=True), None)
2481 else: 2480 else:
2482 return difffn(opts, None) 2481 return difffn(opts, None)
2483 2482
2483 def diffsinglehunk(hunklines):
2484 """yield tokens for a list of lines in a single hunk"""
2485 for line in hunklines:
2486 # chomp
2487 chompline = line.rstrip('\n')
2488 # highlight tabs and trailing whitespace
2489 stripline = chompline.rstrip()
2490 if line[0] == '-':
2491 label = 'diff.deleted'
2492 elif line[0] == '+':
2493 label = 'diff.inserted'
2494 else:
2495 raise error.ProgrammingError('unexpected hunk line: %s' % line)
2496 for token in tabsplitter.findall(stripline):
2497 if '\t' == token[0]:
2498 yield (token, 'diff.tab')
2499 else:
2500 yield (token, label)
2501
2502 if chompline != stripline:
2503 yield (chompline[len(stripline):], 'diff.trailingwhitespace')
2504 if chompline != line:
2505 yield (line[len(chompline):], '')
2506
2484 def difflabel(func, *args, **kw): 2507 def difflabel(func, *args, **kw):
2485 '''yields 2-tuples of (output, label) based on the output of func()''' 2508 '''yields 2-tuples of (output, label) based on the output of func()'''
2486 inlinecolor = False
2487 if kw.get(r'opts'):
2488 inlinecolor = kw[r'opts'].worddiff
2489 headprefixes = [('diff', 'diff.diffline'), 2509 headprefixes = [('diff', 'diff.diffline'),
2490 ('copy', 'diff.extended'), 2510 ('copy', 'diff.extended'),
2491 ('rename', 'diff.extended'), 2511 ('rename', 'diff.extended'),
2492 ('old', 'diff.extended'), 2512 ('old', 'diff.extended'),
2493 ('new', 'diff.extended'), 2513 ('new', 'diff.extended'),
2495 ('index', 'diff.extended'), 2515 ('index', 'diff.extended'),
2496 ('similarity', 'diff.extended'), 2516 ('similarity', 'diff.extended'),
2497 ('---', 'diff.file_a'), 2517 ('---', 'diff.file_a'),
2498 ('+++', 'diff.file_b')] 2518 ('+++', 'diff.file_b')]
2499 textprefixes = [('@', 'diff.hunk'), 2519 textprefixes = [('@', 'diff.hunk'),
2500 ('-', 'diff.deleted'), 2520 # - and + are handled by diffsinglehunk
2501 ('+', 'diff.inserted')] 2521 ]
2502 head = False 2522 head = False
2523
2524 # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
2525 hunkbuffer = []
2526 def consumehunkbuffer():
2527 if hunkbuffer:
2528 for token in diffsinglehunk(hunkbuffer):
2529 yield token
2530 hunkbuffer[:] = []
2531
2503 for chunk in func(*args, **kw): 2532 for chunk in func(*args, **kw):
2504 lines = chunk.split('\n') 2533 lines = chunk.split('\n')
2505 matches = {}
2506 if inlinecolor:
2507 matches = _findmatches(lines)
2508 linecount = len(lines) 2534 linecount = len(lines)
2509 for i, line in enumerate(lines): 2535 for i, line in enumerate(lines):
2510 if head: 2536 if head:
2511 if line.startswith('@'): 2537 if line.startswith('@'):
2512 head = False 2538 head = False
2513 else: 2539 else:
2514 if line and not line.startswith((' ', '+', '-', '@', '\\')): 2540 if line and not line.startswith((' ', '+', '-', '@', '\\')):
2515 head = True 2541 head = True
2516 stripline = line
2517 diffline = False 2542 diffline = False
2518 if not head and line and line.startswith(('+', '-')): 2543 if not head and line and line.startswith(('+', '-')):
2519 # highlight tabs and trailing whitespace, but only in
2520 # changed lines
2521 stripline = line.rstrip()
2522 diffline = True 2544 diffline = True
2523 2545
2524 prefixes = textprefixes 2546 prefixes = textprefixes
2525 if head: 2547 if head:
2526 prefixes = headprefixes 2548 prefixes = headprefixes
2527 for prefix, label in prefixes: 2549 if diffline:
2528 if stripline.startswith(prefix): 2550 # buffered
2529 if diffline: 2551 bufferedline = line
2530 if i in matches: 2552 if i + 1 < linecount:
2531 for t, l in _inlinediff(lines[i].rstrip(), 2553 bufferedline += "\n"
2532 lines[matches[i]].rstrip(), 2554 hunkbuffer.append(bufferedline)
2533 label): 2555 else:
2534 yield (t, l) 2556 # unbuffered
2535 else: 2557 for token in consumehunkbuffer():
2536 for token in tabsplitter.findall(stripline): 2558 yield token
2537 if token.startswith('\t'): 2559 stripline = line.rstrip()
2538 yield (token, 'diff.tab') 2560 for prefix, label in prefixes:
2539 else: 2561 if stripline.startswith(prefix):
2540 yield (token, label)
2541 else:
2542 yield (stripline, label) 2562 yield (stripline, label)
2543 break 2563 if line != stripline:
2544 else: 2564 yield (line[len(stripline):],
2545 yield (line, '') 2565 'diff.trailingwhitespace')
2546 if line != stripline:
2547 yield (line[len(stripline):], 'diff.trailingwhitespace')
2548 if i + 1 < linecount:
2549 yield ('\n', '')
2550
2551 def _findmatches(slist):
2552 '''Look for insertion matches to deletion and returns a dict of
2553 correspondences.
2554 '''
2555 lastmatch = 0
2556 matches = {}
2557 for i, line in enumerate(slist):
2558 if line == '':
2559 continue
2560 if line.startswith('-'):
2561 lastmatch = max(lastmatch, i)
2562 newgroup = False
2563 for j, newline in enumerate(slist[lastmatch + 1:]):
2564 if newline == '':
2565 continue
2566 if newline.startswith('-') and newgroup: # too far, no match
2567 break
2568 if newline.startswith('+'): # potential match
2569 newgroup = True
2570 sim = difflib.SequenceMatcher(None, line, newline).ratio()
2571 if sim > 0.7:
2572 lastmatch = lastmatch + 1 + j
2573 matches[i] = lastmatch
2574 matches[lastmatch] = i
2575 break 2566 break
2576 return matches 2567 else:
2577 2568 yield (line, '')
2578 def _inlinediff(s1, s2, operation): 2569 if i + 1 < linecount:
2579 '''Perform string diff to highlight specific changes.''' 2570 yield ('\n', '')
2580 operation_skip = ('+', '?') if operation == 'diff.deleted' else ('-', '?') 2571 for token in consumehunkbuffer():
2581 if operation == 'diff.deleted': 2572 yield token
2582 s2, s1 = s1, s2
2583
2584 buff = []
2585 # we never want to higlight the leading +-
2586 if operation == 'diff.deleted' and s2.startswith('-'):
2587 label = operation
2588 token = '-'
2589 s2 = s2[1:]
2590 s1 = s1[1:]
2591 elif operation == 'diff.inserted' and s1.startswith('+'):
2592 label = operation
2593 token = '+'
2594 s2 = s2[1:]
2595 s1 = s1[1:]
2596 else:
2597 raise error.ProgrammingError("Case not expected, operation = %s" %
2598 operation)
2599
2600 s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1))
2601 for part in s:
2602 if part.startswith(operation_skip) or len(part) == 2:
2603 continue
2604 l = operation + '.highlight'
2605 if part.startswith(' '):
2606 l = operation
2607 if part[2:] == '\t':
2608 l = 'diff.tab'
2609 if l == label: # contiguous token with same label
2610 token += part[2:]
2611 continue
2612 else:
2613 buff.append((token, label))
2614 label = l
2615 token = part[2:]
2616 buff.append((token, label))
2617
2618 return buff
2619 2573
2620 def diffui(*args, **kw): 2574 def diffui(*args, **kw):
2621 '''like diff(), but yields 2-tuples of (output, label) for ui.write()''' 2575 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2622 return difflabel(diff, *args, **kw) 2576 return difflabel(diff, *args, **kw)
2623 2577