annotate mercurial/pure/bdiff.py @ 51461:47752632b4fc

branchcache: rename `load` to `_load_heads` We are about to have more similar function, we rename the existing one to a more meaningful name and mark it private in the process.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 26 Feb 2024 15:12:20 +0100
parents 594fc56c0af7
children f4733654f144
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
1 # bdiff.py - Python implementation of bdiff.c
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
2 #
46819
d4ba4d51f85f contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents: 45863
diff changeset
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
4 #
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7944
diff changeset
5 # This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 8225
diff changeset
6 # GNU General Public License version 2 or any later version.
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
7
27335
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
8
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
9 import difflib
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
10 import re
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
11 import struct
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
12
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
13 from typing import (
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
14 List,
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
15 Tuple,
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
16 )
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
17
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
18
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
19 def splitnewlines(text: bytes) -> List[bytes]:
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
20 '''like str.splitlines, but only split on newlines.'''
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
21 lines = [l + b'\n' for l in text.split(b'\n')]
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
22 if lines:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
23 if lines[-1] == b'\n':
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
24 lines.pop()
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
25 else:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
26 lines[-1] = lines[-1][:-1]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
27 return lines
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
28
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
29
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
30 def _normalizeblocks(
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
31 a: List[bytes], b: List[bytes], blocks
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
32 ) -> List[Tuple[int, int, int]]:
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
33 prev = None
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
34 r = []
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
35 for curr in blocks:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
36 if prev is None:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
37 prev = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
38 continue
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
39 shift = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
40
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
41 a1, b1, l1 = prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
42 a1end = a1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
43 b1end = b1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
44
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
45 a2, b2, l2 = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
46 a2end = a2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
47 b2end = b2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
48 if a1end == a2:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
49 while (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
50 a1end + shift < a2end and a[a1end + shift] == b[b1end + shift]
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
51 ):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
52 shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
53 elif b1end == b2:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
54 while (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
55 b1end + shift < b2end and a[a1end + shift] == b[b1end + shift]
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
56 ):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
57 shift += 1
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
58 r.append((a1, b1, l1 + shift))
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
59 prev = a2 + shift, b2 + shift, l2 - shift
45863
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
60
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
61 if prev is not None:
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
62 r.append(prev)
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
63
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
64 return r
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
65
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
66
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
67 def bdiff(a: bytes, b: bytes) -> bytes:
31641
f2b334e6c7e0 py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
Yuya Nishihara <yuya@tcha.org>
parents: 31640
diff changeset
68 a = bytes(a).splitlines(True)
f2b334e6c7e0 py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
Yuya Nishihara <yuya@tcha.org>
parents: 31640
diff changeset
69 b = bytes(b).splitlines(True)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
70
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
71 if not a:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
72 s = b"".join(b)
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
73 return s and (struct.pack(b">lll", 0, 0, len(s)) + s)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
74
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
75 bin = []
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
76 p = [0]
34435
5326e4ef1dab style: never put multiple statements on one line
Alex Gaynor <agaynor@mozilla.com>
parents: 32512
diff changeset
77 for i in a:
5326e4ef1dab style: never put multiple statements on one line
Alex Gaynor <agaynor@mozilla.com>
parents: 32512
diff changeset
78 p.append(p[-1] + len(i))
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
79
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
80 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
81 d = _normalizeblocks(a, b, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
82 la = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
83 lb = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
84 for am, bm, size in d:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
85 s = b"".join(b[lb:bm])
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
86 if am > la or s:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
87 bin.append(struct.pack(b">lll", p[la], p[am], len(s)) + s)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
88 la = am + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
89 lb = bm + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
90
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
91 return b"".join(bin)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
92
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
93
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
94 def blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int, int]]:
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
95 an = splitnewlines(a)
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
96 bn = splitnewlines(b)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
97 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
98 d = _normalizeblocks(an, bn, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
99 return [(i, i + n, j, j + n) for (i, j, n) in d]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
100
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
101
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
102 def fixws(text: bytes, allws: bool) -> bytes:
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
103 if allws:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
104 text = re.sub(b'[ \t\r]+', b'', text)
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
105 else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
106 text = re.sub(b'[ \t\r]+', b' ', text)
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
107 text = text.replace(b' \n', b'\n')
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
108 return text