Mercurial > hg
annotate mercurial/pure/bdiff.py @ 52282:51a350a22d0c
branching: merge stable into default
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 20 Nov 2024 15:53:19 +0100 |
parents | 09f3a6790e56 |
children |
rev | line source |
---|---|
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
1 # bdiff.py - Python implementation of bdiff.c |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
2 # |
46819
d4ba4d51f85f
contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents:
45863
diff
changeset
|
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
4 # |
8225
46293a0c7e9f
updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents:
7944
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
10263 | 6 # GNU General Public License version 2 or any later version. |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
7 |
51863
f4733654f144
typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents:
49598
diff
changeset
|
8 from __future__ import annotations |
27335
c4e3ff497f89
bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents:
15530
diff
changeset
|
9 |
c4e3ff497f89
bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents:
15530
diff
changeset
|
10 import difflib |
c4e3ff497f89
bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents:
15530
diff
changeset
|
11 import re |
c4e3ff497f89
bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents:
15530
diff
changeset
|
12 import struct |
51934
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
13 import typing |
7944
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
14 |
49598
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
15 from typing import ( |
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
16 List, |
51934
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
17 Optional, |
49598
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
18 Tuple, |
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
19 ) |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
20 |
51934
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
21 from ..interfaces import ( |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
22 modules as intmod, |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
23 ) |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
24 |
49598
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
25 |
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
26 def splitnewlines(text: bytes) -> List[bytes]: |
7944
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
27 '''like str.splitlines, but only split on newlines.''' |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
28 lines = [l + b'\n' for l in text.split(b'\n')] |
7944
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
29 if lines: |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
30 if lines[-1] == b'\n': |
7944
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
31 lines.pop() |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
32 else: |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
33 lines[-1] = lines[-1][:-1] |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
34 return lines |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
35 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
36 |
49598
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
37 def _normalizeblocks( |
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
38 a: List[bytes], b: List[bytes], blocks |
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
39 ) -> List[Tuple[int, int, int]]: |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
40 prev = None |
14066
14fac6c0536a
pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents:
10282
diff
changeset
|
41 r = [] |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
42 for curr in blocks: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
43 if prev is None: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
44 prev = curr |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
45 continue |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
46 shift = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
47 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
48 a1, b1, l1 = prev |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
49 a1end = a1 + l1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
50 b1end = b1 + l1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
51 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
52 a2, b2, l2 = curr |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
53 a2end = a2 + l2 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
54 b2end = b2 + l2 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
55 if a1end == a2: |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
56 while ( |
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
57 a1end + shift < a2end and a[a1end + shift] == b[b1end + shift] |
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
58 ): |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
59 shift += 1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
60 elif b1end == b2: |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
61 while ( |
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
62 b1end + shift < b2end and a[a1end + shift] == b[b1end + shift] |
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
63 ): |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
64 shift += 1 |
14066
14fac6c0536a
pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents:
10282
diff
changeset
|
65 r.append((a1, b1, l1 + shift)) |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
10263
diff
changeset
|
66 prev = a2 + shift, b2 + shift, l2 - shift |
45863
68aedad4c11c
pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents:
43077
diff
changeset
|
67 |
68aedad4c11c
pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents:
43077
diff
changeset
|
68 if prev is not None: |
68aedad4c11c
pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents:
43077
diff
changeset
|
69 r.append(prev) |
68aedad4c11c
pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents:
43077
diff
changeset
|
70 |
14066
14fac6c0536a
pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents:
10282
diff
changeset
|
71 return r |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
72 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
73 |
49598
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
74 def bdiff(a: bytes, b: bytes) -> bytes: |
31641
f2b334e6c7e0
py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
Yuya Nishihara <yuya@tcha.org>
parents:
31640
diff
changeset
|
75 a = bytes(a).splitlines(True) |
f2b334e6c7e0
py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
Yuya Nishihara <yuya@tcha.org>
parents:
31640
diff
changeset
|
76 b = bytes(b).splitlines(True) |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
77 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
78 if not a: |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
79 s = b"".join(b) |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
80 return s and (struct.pack(b">lll", 0, 0, len(s)) + s) |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
81 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
82 bin = [] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
83 p = [0] |
34435
5326e4ef1dab
style: never put multiple statements on one line
Alex Gaynor <agaynor@mozilla.com>
parents:
32512
diff
changeset
|
84 for i in a: |
5326e4ef1dab
style: never put multiple statements on one line
Alex Gaynor <agaynor@mozilla.com>
parents:
32512
diff
changeset
|
85 p.append(p[-1] + len(i)) |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
86 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
87 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
88 d = _normalizeblocks(a, b, d) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
89 la = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
90 lb = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
91 for am, bm, size in d: |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
92 s = b"".join(b[lb:bm]) |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
93 if am > la or s: |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
94 bin.append(struct.pack(b">lll", p[la], p[am], len(s)) + s) |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
95 la = am + size |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
96 lb = bm + size |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
97 |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
98 return b"".join(bin) |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
99 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
100 |
49598
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
101 def blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int, int]]: |
7944
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
102 an = splitnewlines(a) |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
103 bn = splitnewlines(b) |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
104 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
105 d = _normalizeblocks(an, bn, d) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
106 return [(i, i + n, j, j + n) for (i, j, n) in d] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
107 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
41269
diff
changeset
|
108 |
49598
594fc56c0af7
typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
109 def fixws(text: bytes, allws: bool) -> bytes: |
15530
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
110 if allws: |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
111 text = re.sub(b'[ \t\r]+', b'', text) |
15530
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
112 else: |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
113 text = re.sub(b'[ \t\r]+', b' ', text) |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
114 text = text.replace(b' \n', b'\n') |
15530
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
115 return text |
51934
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
116 |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
117 |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
118 # In order to adhere to the module protocol, these functions must be visible to |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
119 # the type checker, though they aren't actually implemented by this |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
120 # implementation of the module protocol. Callers are responsible for |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
121 # checking that the implementation is available before using them. |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
122 if typing.TYPE_CHECKING: |
09f3a6790e56
interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents:
51863
diff
changeset
|
123 xdiffblocks: Optional[intmod.BDiffBlocksFnc] = None |