Mercurial > hg
comparison contrib/simplemerge @ 4362:465b9ea02868
Import 3-way merge code from bzr
merge3.py is imported as contrib/simplemerge
test_merge3.py is imported as tests/test-simplemerge.py
author | Alexis S. L. Carvalho <alexis@cecm.usp.br> |
---|---|
date | Mon, 16 Apr 2007 20:17:39 -0300 |
parents | |
children | 2e3c54fb79a3 |
comparison
equal
deleted
inserted
replaced
4361:99c853a1408c | 4362:465b9ea02868 |
---|---|
1 # Copyright (C) 2004, 2005 Canonical Ltd | |
2 # | |
3 # This program is free software; you can redistribute it and/or modify | |
4 # it under the terms of the GNU General Public License as published by | |
5 # the Free Software Foundation; either version 2 of the License, or | |
6 # (at your option) any later version. | |
7 # | |
8 # This program is distributed in the hope that it will be useful, | |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 # GNU General Public License for more details. | |
12 # | |
13 # You should have received a copy of the GNU General Public License | |
14 # along with this program; if not, write to the Free Software | |
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
16 | |
17 | |
18 # mbp: "you know that thing where cvs gives you conflict markers?" | |
19 # s: "i hate that." | |
20 | |
21 | |
22 from bzrlib.errors import CantReprocessAndShowBase | |
23 import bzrlib.patiencediff | |
24 from bzrlib.textfile import check_text_lines | |
25 | |
26 | |
27 def intersect(ra, rb): | |
28 """Given two ranges return the range where they intersect or None. | |
29 | |
30 >>> intersect((0, 10), (0, 6)) | |
31 (0, 6) | |
32 >>> intersect((0, 10), (5, 15)) | |
33 (5, 10) | |
34 >>> intersect((0, 10), (10, 15)) | |
35 >>> intersect((0, 9), (10, 15)) | |
36 >>> intersect((0, 9), (7, 15)) | |
37 (7, 9) | |
38 """ | |
39 assert ra[0] <= ra[1] | |
40 assert rb[0] <= rb[1] | |
41 | |
42 sa = max(ra[0], rb[0]) | |
43 sb = min(ra[1], rb[1]) | |
44 if sa < sb: | |
45 return sa, sb | |
46 else: | |
47 return None | |
48 | |
49 | |
50 def compare_range(a, astart, aend, b, bstart, bend): | |
51 """Compare a[astart:aend] == b[bstart:bend], without slicing. | |
52 """ | |
53 if (aend-astart) != (bend-bstart): | |
54 return False | |
55 for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)): | |
56 if a[ia] != b[ib]: | |
57 return False | |
58 else: | |
59 return True | |
60 | |
61 | |
62 | |
63 | |
64 class Merge3(object): | |
65 """3-way merge of texts. | |
66 | |
67 Given BASE, OTHER, THIS, tries to produce a combined text | |
68 incorporating the changes from both BASE->OTHER and BASE->THIS. | |
69 All three will typically be sequences of lines.""" | |
70 def __init__(self, base, a, b): | |
71 check_text_lines(base) | |
72 check_text_lines(a) | |
73 check_text_lines(b) | |
74 self.base = base | |
75 self.a = a | |
76 self.b = b | |
77 | |
78 | |
79 | |
80 def merge_lines(self, | |
81 name_a=None, | |
82 name_b=None, | |
83 name_base=None, | |
84 start_marker='<<<<<<<', | |
85 mid_marker='=======', | |
86 end_marker='>>>>>>>', | |
87 base_marker=None, | |
88 reprocess=False): | |
89 """Return merge in cvs-like form. | |
90 """ | |
91 newline = '\n' | |
92 if len(self.a) > 0: | |
93 if self.a[0].endswith('\r\n'): | |
94 newline = '\r\n' | |
95 elif self.a[0].endswith('\r'): | |
96 newline = '\r' | |
97 if base_marker and reprocess: | |
98 raise CantReprocessAndShowBase() | |
99 if name_a: | |
100 start_marker = start_marker + ' ' + name_a | |
101 if name_b: | |
102 end_marker = end_marker + ' ' + name_b | |
103 if name_base and base_marker: | |
104 base_marker = base_marker + ' ' + name_base | |
105 merge_regions = self.merge_regions() | |
106 if reprocess is True: | |
107 merge_regions = self.reprocess_merge_regions(merge_regions) | |
108 for t in merge_regions: | |
109 what = t[0] | |
110 if what == 'unchanged': | |
111 for i in range(t[1], t[2]): | |
112 yield self.base[i] | |
113 elif what == 'a' or what == 'same': | |
114 for i in range(t[1], t[2]): | |
115 yield self.a[i] | |
116 elif what == 'b': | |
117 for i in range(t[1], t[2]): | |
118 yield self.b[i] | |
119 elif what == 'conflict': | |
120 yield start_marker + newline | |
121 for i in range(t[3], t[4]): | |
122 yield self.a[i] | |
123 if base_marker is not None: | |
124 yield base_marker + newline | |
125 for i in range(t[1], t[2]): | |
126 yield self.base[i] | |
127 yield mid_marker + newline | |
128 for i in range(t[5], t[6]): | |
129 yield self.b[i] | |
130 yield end_marker + newline | |
131 else: | |
132 raise ValueError(what) | |
133 | |
134 | |
135 | |
136 | |
137 | |
138 def merge_annotated(self): | |
139 """Return merge with conflicts, showing origin of lines. | |
140 | |
141 Most useful for debugging merge. | |
142 """ | |
143 for t in self.merge_regions(): | |
144 what = t[0] | |
145 if what == 'unchanged': | |
146 for i in range(t[1], t[2]): | |
147 yield 'u | ' + self.base[i] | |
148 elif what == 'a' or what == 'same': | |
149 for i in range(t[1], t[2]): | |
150 yield what[0] + ' | ' + self.a[i] | |
151 elif what == 'b': | |
152 for i in range(t[1], t[2]): | |
153 yield 'b | ' + self.b[i] | |
154 elif what == 'conflict': | |
155 yield '<<<<\n' | |
156 for i in range(t[3], t[4]): | |
157 yield 'A | ' + self.a[i] | |
158 yield '----\n' | |
159 for i in range(t[5], t[6]): | |
160 yield 'B | ' + self.b[i] | |
161 yield '>>>>\n' | |
162 else: | |
163 raise ValueError(what) | |
164 | |
165 | |
166 | |
167 | |
168 | |
169 def merge_groups(self): | |
170 """Yield sequence of line groups. Each one is a tuple: | |
171 | |
172 'unchanged', lines | |
173 Lines unchanged from base | |
174 | |
175 'a', lines | |
176 Lines taken from a | |
177 | |
178 'same', lines | |
179 Lines taken from a (and equal to b) | |
180 | |
181 'b', lines | |
182 Lines taken from b | |
183 | |
184 'conflict', base_lines, a_lines, b_lines | |
185 Lines from base were changed to either a or b and conflict. | |
186 """ | |
187 for t in self.merge_regions(): | |
188 what = t[0] | |
189 if what == 'unchanged': | |
190 yield what, self.base[t[1]:t[2]] | |
191 elif what == 'a' or what == 'same': | |
192 yield what, self.a[t[1]:t[2]] | |
193 elif what == 'b': | |
194 yield what, self.b[t[1]:t[2]] | |
195 elif what == 'conflict': | |
196 yield (what, | |
197 self.base[t[1]:t[2]], | |
198 self.a[t[3]:t[4]], | |
199 self.b[t[5]:t[6]]) | |
200 else: | |
201 raise ValueError(what) | |
202 | |
203 | |
204 def merge_regions(self): | |
205 """Return sequences of matching and conflicting regions. | |
206 | |
207 This returns tuples, where the first value says what kind we | |
208 have: | |
209 | |
210 'unchanged', start, end | |
211 Take a region of base[start:end] | |
212 | |
213 'same', astart, aend | |
214 b and a are different from base but give the same result | |
215 | |
216 'a', start, end | |
217 Non-clashing insertion from a[start:end] | |
218 | |
219 Method is as follows: | |
220 | |
221 The two sequences align only on regions which match the base | |
222 and both descendents. These are found by doing a two-way diff | |
223 of each one against the base, and then finding the | |
224 intersections between those regions. These "sync regions" | |
225 are by definition unchanged in both and easily dealt with. | |
226 | |
227 The regions in between can be in any of three cases: | |
228 conflicted, or changed on only one side. | |
229 """ | |
230 | |
231 # section a[0:ia] has been disposed of, etc | |
232 iz = ia = ib = 0 | |
233 | |
234 for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions(): | |
235 #print 'match base [%d:%d]' % (zmatch, zend) | |
236 | |
237 matchlen = zend - zmatch | |
238 assert matchlen >= 0 | |
239 assert matchlen == (aend - amatch) | |
240 assert matchlen == (bend - bmatch) | |
241 | |
242 len_a = amatch - ia | |
243 len_b = bmatch - ib | |
244 len_base = zmatch - iz | |
245 assert len_a >= 0 | |
246 assert len_b >= 0 | |
247 assert len_base >= 0 | |
248 | |
249 #print 'unmatched a=%d, b=%d' % (len_a, len_b) | |
250 | |
251 if len_a or len_b: | |
252 # try to avoid actually slicing the lists | |
253 equal_a = compare_range(self.a, ia, amatch, | |
254 self.base, iz, zmatch) | |
255 equal_b = compare_range(self.b, ib, bmatch, | |
256 self.base, iz, zmatch) | |
257 same = compare_range(self.a, ia, amatch, | |
258 self.b, ib, bmatch) | |
259 | |
260 if same: | |
261 yield 'same', ia, amatch | |
262 elif equal_a and not equal_b: | |
263 yield 'b', ib, bmatch | |
264 elif equal_b and not equal_a: | |
265 yield 'a', ia, amatch | |
266 elif not equal_a and not equal_b: | |
267 yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch | |
268 else: | |
269 raise AssertionError("can't handle a=b=base but unmatched") | |
270 | |
271 ia = amatch | |
272 ib = bmatch | |
273 iz = zmatch | |
274 | |
275 # if the same part of the base was deleted on both sides | |
276 # that's OK, we can just skip it. | |
277 | |
278 | |
279 if matchlen > 0: | |
280 assert ia == amatch | |
281 assert ib == bmatch | |
282 assert iz == zmatch | |
283 | |
284 yield 'unchanged', zmatch, zend | |
285 iz = zend | |
286 ia = aend | |
287 ib = bend | |
288 | |
289 | |
290 def reprocess_merge_regions(self, merge_regions): | |
291 """Where there are conflict regions, remove the agreed lines. | |
292 | |
293 Lines where both A and B have made the same changes are | |
294 eliminated. | |
295 """ | |
296 for region in merge_regions: | |
297 if region[0] != "conflict": | |
298 yield region | |
299 continue | |
300 type, iz, zmatch, ia, amatch, ib, bmatch = region | |
301 a_region = self.a[ia:amatch] | |
302 b_region = self.b[ib:bmatch] | |
303 matches = bzrlib.patiencediff.PatienceSequenceMatcher( | |
304 None, a_region, b_region).get_matching_blocks() | |
305 next_a = ia | |
306 next_b = ib | |
307 for region_ia, region_ib, region_len in matches[:-1]: | |
308 region_ia += ia | |
309 region_ib += ib | |
310 reg = self.mismatch_region(next_a, region_ia, next_b, | |
311 region_ib) | |
312 if reg is not None: | |
313 yield reg | |
314 yield 'same', region_ia, region_len+region_ia | |
315 next_a = region_ia + region_len | |
316 next_b = region_ib + region_len | |
317 reg = self.mismatch_region(next_a, amatch, next_b, bmatch) | |
318 if reg is not None: | |
319 yield reg | |
320 | |
321 | |
322 @staticmethod | |
323 def mismatch_region(next_a, region_ia, next_b, region_ib): | |
324 if next_a < region_ia or next_b < region_ib: | |
325 return 'conflict', None, None, next_a, region_ia, next_b, region_ib | |
326 | |
327 | |
328 def find_sync_regions(self): | |
329 """Return a list of sync regions, where both descendents match the base. | |
330 | |
331 Generates a list of (base1, base2, a1, a2, b1, b2). There is | |
332 always a zero-length sync region at the end of all the files. | |
333 """ | |
334 | |
335 ia = ib = 0 | |
336 amatches = bzrlib.patiencediff.PatienceSequenceMatcher( | |
337 None, self.base, self.a).get_matching_blocks() | |
338 bmatches = bzrlib.patiencediff.PatienceSequenceMatcher( | |
339 None, self.base, self.b).get_matching_blocks() | |
340 len_a = len(amatches) | |
341 len_b = len(bmatches) | |
342 | |
343 sl = [] | |
344 | |
345 while ia < len_a and ib < len_b: | |
346 abase, amatch, alen = amatches[ia] | |
347 bbase, bmatch, blen = bmatches[ib] | |
348 | |
349 # there is an unconflicted block at i; how long does it | |
350 # extend? until whichever one ends earlier. | |
351 i = intersect((abase, abase+alen), (bbase, bbase+blen)) | |
352 if i: | |
353 intbase = i[0] | |
354 intend = i[1] | |
355 intlen = intend - intbase | |
356 | |
357 # found a match of base[i[0], i[1]]; this may be less than | |
358 # the region that matches in either one | |
359 assert intlen <= alen | |
360 assert intlen <= blen | |
361 assert abase <= intbase | |
362 assert bbase <= intbase | |
363 | |
364 asub = amatch + (intbase - abase) | |
365 bsub = bmatch + (intbase - bbase) | |
366 aend = asub + intlen | |
367 bend = bsub + intlen | |
368 | |
369 assert self.base[intbase:intend] == self.a[asub:aend], \ | |
370 (self.base[intbase:intend], self.a[asub:aend]) | |
371 | |
372 assert self.base[intbase:intend] == self.b[bsub:bend] | |
373 | |
374 sl.append((intbase, intend, | |
375 asub, aend, | |
376 bsub, bend)) | |
377 | |
378 # advance whichever one ends first in the base text | |
379 if (abase + alen) < (bbase + blen): | |
380 ia += 1 | |
381 else: | |
382 ib += 1 | |
383 | |
384 intbase = len(self.base) | |
385 abase = len(self.a) | |
386 bbase = len(self.b) | |
387 sl.append((intbase, intbase, abase, abase, bbase, bbase)) | |
388 | |
389 return sl | |
390 | |
391 | |
392 | |
393 def find_unconflicted(self): | |
394 """Return a list of ranges in base that are not conflicted.""" | |
395 am = bzrlib.patiencediff.PatienceSequenceMatcher( | |
396 None, self.base, self.a).get_matching_blocks() | |
397 bm = bzrlib.patiencediff.PatienceSequenceMatcher( | |
398 None, self.base, self.b).get_matching_blocks() | |
399 | |
400 unc = [] | |
401 | |
402 while am and bm: | |
403 # there is an unconflicted block at i; how long does it | |
404 # extend? until whichever one ends earlier. | |
405 a1 = am[0][0] | |
406 a2 = a1 + am[0][2] | |
407 b1 = bm[0][0] | |
408 b2 = b1 + bm[0][2] | |
409 i = intersect((a1, a2), (b1, b2)) | |
410 if i: | |
411 unc.append(i) | |
412 | |
413 if a2 < b2: | |
414 del am[0] | |
415 else: | |
416 del bm[0] | |
417 | |
418 return unc | |
419 | |
420 | |
421 def main(argv): | |
422 # as for diff3 and meld the syntax is "MINE BASE OTHER" | |
423 a = file(argv[1], 'rt').readlines() | |
424 base = file(argv[2], 'rt').readlines() | |
425 b = file(argv[3], 'rt').readlines() | |
426 | |
427 m3 = Merge3(base, a, b) | |
428 | |
429 #for sr in m3.find_sync_regions(): | |
430 # print sr | |
431 | |
432 # sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3])) | |
433 sys.stdout.writelines(m3.merge_annotated()) | |
434 | |
435 | |
436 if __name__ == '__main__': | |
437 import sys | |
438 sys.exit(main(sys.argv)) |