annotate mercurial/pure/bdiff.py @ 51479:5bd31e68c7a3

stream-clone-test: add title to various test cases These case are fine as is, but as we are adding title to all the other as we simplify them, lets add title for all cases.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Fri, 08 Mar 2024 10:59:51 +0100
parents 594fc56c0af7
children f4733654f144
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
1 # bdiff.py - Python implementation of bdiff.c
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
2 #
46819
d4ba4d51f85f contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents: 45863
diff changeset
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
4 #
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7944
diff changeset
5 # This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 8225
diff changeset
6 # GNU General Public License version 2 or any later version.
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
7
27335
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
8
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
9 import difflib
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
10 import re
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
11 import struct
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
12
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
13 from typing import (
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
14 List,
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
15 Tuple,
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
16 )
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
17
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
18
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
19 def splitnewlines(text: bytes) -> List[bytes]:
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
20 '''like str.splitlines, but only split on newlines.'''
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
21 lines = [l + b'\n' for l in text.split(b'\n')]
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
22 if lines:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
23 if lines[-1] == b'\n':
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
24 lines.pop()
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
25 else:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
26 lines[-1] = lines[-1][:-1]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
27 return lines
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
28
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
29
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
30 def _normalizeblocks(
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
31 a: List[bytes], b: List[bytes], blocks
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
32 ) -> List[Tuple[int, int, int]]:
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
33 prev = None
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
34 r = []
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
35 for curr in blocks:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
36 if prev is None:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
37 prev = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
38 continue
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
39 shift = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
40
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
41 a1, b1, l1 = prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
42 a1end = a1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
43 b1end = b1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
44
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
45 a2, b2, l2 = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
46 a2end = a2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
47 b2end = b2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
48 if a1end == a2:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
49 while (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
50 a1end + shift < a2end and a[a1end + shift] == b[b1end + shift]
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
51 ):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
52 shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
53 elif b1end == b2:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
54 while (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
55 b1end + shift < b2end and a[a1end + shift] == b[b1end + shift]
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
56 ):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
57 shift += 1
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
58 r.append((a1, b1, l1 + shift))
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
59 prev = a2 + shift, b2 + shift, l2 - shift
45863
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
60
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
61 if prev is not None:
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
62 r.append(prev)
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
63
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
64 return r
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
65
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
66
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
67 def bdiff(a: bytes, b: bytes) -> bytes:
31641
f2b334e6c7e0 py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
Yuya Nishihara <yuya@tcha.org>
parents: 31640
diff changeset
68 a = bytes(a).splitlines(True)
f2b334e6c7e0 py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
Yuya Nishihara <yuya@tcha.org>
parents: 31640
diff changeset
69 b = bytes(b).splitlines(True)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
70
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
71 if not a:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
72 s = b"".join(b)
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
73 return s and (struct.pack(b">lll", 0, 0, len(s)) + s)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
74
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
75 bin = []
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
76 p = [0]
34435
5326e4ef1dab style: never put multiple statements on one line
Alex Gaynor <agaynor@mozilla.com>
parents: 32512
diff changeset
77 for i in a:
5326e4ef1dab style: never put multiple statements on one line
Alex Gaynor <agaynor@mozilla.com>
parents: 32512
diff changeset
78 p.append(p[-1] + len(i))
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
79
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
80 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
81 d = _normalizeblocks(a, b, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
82 la = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
83 lb = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
84 for am, bm, size in d:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
85 s = b"".join(b[lb:bm])
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
86 if am > la or s:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
87 bin.append(struct.pack(b">lll", p[la], p[am], len(s)) + s)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
88 la = am + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
89 lb = bm + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
90
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
91 return b"".join(bin)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
92
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
93
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
94 def blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int, int]]:
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
95 an = splitnewlines(a)
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
96 bn = splitnewlines(b)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
97 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
98 d = _normalizeblocks(an, bn, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
99 return [(i, i + n, j, j + n) for (i, j, n) in d]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
100
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
101
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
102 def fixws(text: bytes, allws: bool) -> bytes:
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
103 if allws:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
104 text = re.sub(b'[ \t\r]+', b'', text)
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
105 else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
106 text = re.sub(b'[ \t\r]+', b' ', text)
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
107 text = text.replace(b' \n', b'\n')
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
108 return text