annotate mercurial/pure/bdiff.py @ 29095:3f33831a9202

develwarn: move config gating inside the develwarn function The config gating is almost always the same and contributor tend to forget it. We move the logic inside the function. Call site will be updated in later changeset. We might make the sub config mandatory in the future (once all old call sites are gone).
author Pierre-Yves David <pierre-yves.david@ens-lyon.org>
date Sun, 08 May 2016 10:43:41 +0200
parents 9ab45fbe045e
children a8933d992a71
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
1 # bdiff.py - Python implementation of bdiff.c
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
2 #
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
4 #
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7944
diff changeset
5 # This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 8225
diff changeset
6 # GNU General Public License version 2 or any later version.
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
7
27335
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
8 from __future__ import absolute_import
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
9
28389
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
10 import array
27335
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
11 import difflib
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
12 import re
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
13 import struct
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
14
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
15 def splitnewlines(text):
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
16 '''like str.splitlines, but only split on newlines.'''
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
17 lines = [l + '\n' for l in text.split('\n')]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
18 if lines:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
19 if lines[-1] == '\n':
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
20 lines.pop()
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
21 else:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
22 lines[-1] = lines[-1][:-1]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
23 return lines
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
24
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
25 def _normalizeblocks(a, b, blocks):
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
26 prev = None
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
27 r = []
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
28 for curr in blocks:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
29 if prev is None:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
30 prev = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
31 continue
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
32 shift = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
33
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
34 a1, b1, l1 = prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
35 a1end = a1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
36 b1end = b1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
37
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
38 a2, b2, l2 = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
39 a2end = a2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
40 b2end = b2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
41 if a1end == a2:
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
42 while (a1end + shift < a2end and
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
43 a[a1end + shift] == b[b1end + shift]):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
44 shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
45 elif b1end == b2:
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
46 while (b1end + shift < b2end and
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
47 a[a1end + shift] == b[b1end + shift]):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
48 shift += 1
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
49 r.append((a1, b1, l1 + shift))
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
50 prev = a2 + shift, b2 + shift, l2 - shift
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
51 r.append(prev)
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
52 return r
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
53
28389
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
54 def _tostring(c):
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
55 if type(c) is array.array:
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
56 # this copy overhead isn't ideal
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
57 return c.tostring()
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
58 return str(c)
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
59
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
60 def bdiff(a, b):
28389
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
61 a = _tostring(a).splitlines(True)
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
62 b = _tostring(b).splitlines(True)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
63
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
64 if not a:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
65 s = "".join(b)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
66 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
67
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
68 bin = []
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
69 p = [0]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
70 for i in a: p.append(p[-1] + len(i))
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
71
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
72 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
73 d = _normalizeblocks(a, b, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
74 la = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
75 lb = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
76 for am, bm, size in d:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
77 s = "".join(b[lb:bm])
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
78 if am > la or s:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
79 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
80 la = am + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
81 lb = bm + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
82
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
83 return "".join(bin)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
84
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
85 def blocks(a, b):
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
86 an = splitnewlines(a)
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
87 bn = splitnewlines(b)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
88 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
89 d = _normalizeblocks(an, bn, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
90 return [(i, i + n, j, j + n) for (i, j, n) in d]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
91
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
92 def fixws(text, allws):
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
93 if allws:
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
94 text = re.sub('[ \t\r]+', '', text)
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
95 else:
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
96 text = re.sub('[ \t\r]+', ' ', text)
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
97 text = text.replace(' \n', '\n')
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
98 return text