Mercurial > hg
comparison mercurial/copies.py @ 42373:f3d06d37e194
copies: split up _chain() in naive chaining and filtering steps
The function now has two clearly defined steps. The first step is the
actual chaining. This step is very cheap. The second step is filtering
out invalid copies. This step is expensive. For changeset-centric copy
tracing, I want to do the filtering step only at the end. This patch
prepares for that.
Differential Revision: https://phab.mercurial-scm.org/D6418
author | Martin von Zweigbergk <martinvonz@google.com> |
---|---|
date | Thu, 09 May 2019 15:09:07 -0700 |
parents | c361db7a5f14 |
children | 4c39c99d9492 |
comparison
equal
deleted
inserted
replaced
42372:ba6ca4e80607 | 42373:f3d06d37e194 |
---|---|
105 # yet the filelog has the copy information in rev 1 and we will not look | 105 # yet the filelog has the copy information in rev 1 and we will not look |
106 # back far enough unless we also look at the a and b as candidates. | 106 # back far enough unless we also look at the a and b as candidates. |
107 # This only occurs when a is a descendent of b or visa-versa. | 107 # This only occurs when a is a descendent of b or visa-versa. |
108 return min(limit, a, b) | 108 return min(limit, a, b) |
109 | 109 |
110 def _chain(src, dst, a, b): | 110 def _chainandfilter(src, dst, a, b): |
111 """chain two sets of copies 'a' and 'b'""" | 111 """chain two sets of copies 'a' and 'b' and filter result""" |
112 | 112 |
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with | 113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with |
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the | 114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the |
115 # following table (not including trivial cases). For example, case 2 is | 115 # following table (not including trivial cases). For example, case 2 is |
116 # where a file existed in 'src' and remained under that name in 'mid' and | 116 # where a file existed in 'src' and remained under that name in 'mid' and |
121 # 2 x y y x->y | 121 # 2 x y y x->y |
122 # 3 x y x - | 122 # 3 x y x - |
123 # 4 x y z x->z | 123 # 4 x y z x->z |
124 # 5 - x y - | 124 # 5 - x y - |
125 # 6 x x y x->y | 125 # 6 x x y x->y |
126 | 126 # |
127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove | 127 # _chain() takes care of chaining the copies in 'a' and 'b', but it |
128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be | 128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or |
129 # overwritten later, and case 3 will be removed later. | 129 # between 5 and 6, so it includes all cases in its result. |
130 # Cases 1, 3, and 5 are then removed by _filter(). | |
131 | |
132 t = _chain(a, b) | |
133 _filter(src, dst, t) | |
134 return t | |
135 | |
136 def _filter(src, dst, t): | |
137 """filters out invalid copies after chaining""" | |
138 for k, v in list(t.items()): | |
139 # remove copies from files that didn't exist | |
140 if v not in src: | |
141 del t[k] | |
142 # remove criss-crossed copies | |
143 elif k in src and v in dst: | |
144 del t[k] | |
145 # remove copies to files that were then removed | |
146 elif k not in dst: | |
147 del t[k] | |
148 | |
149 def _chain(a, b): | |
150 """chain two sets of copies 'a' and 'b'""" | |
130 t = a.copy() | 151 t = a.copy() |
131 for k, v in b.iteritems(): | 152 for k, v in b.iteritems(): |
132 if v in t: | 153 if v in t: |
133 # Found a chain, i.e. cases 3 & 4. We'll remove case 3 later. | |
134 t[k] = t[v] | 154 t[k] = t[v] |
135 else: | 155 else: |
136 # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later. | |
137 t[k] = v | 156 t[k] = v |
138 | |
139 for k, v in list(t.items()): | |
140 # remove copies from files that didn't exist, i.e. case 5 | |
141 if v not in src: | |
142 del t[k] | |
143 # remove criss-crossed copies, i.e. case 3 | |
144 elif k in src and v in dst: | |
145 del t[k] | |
146 # remove copies to files that were then removed, i.e. case 1 | |
147 # and file 'y' in cases 3 & 4 (in case of rename) | |
148 elif k not in dst: | |
149 del t[k] | |
150 | |
151 return t | 157 return t |
152 | 158 |
153 def _tracefile(fctx, am, limit): | 159 def _tracefile(fctx, am, limit): |
154 """return file context that is the ancestor of fctx present in ancestor | 160 """return file context that is the ancestor of fctx present in ancestor |
155 manifest am, stopping after the first ancestor lower than limit""" | 161 manifest am, stopping after the first ancestor lower than limit""" |
305 parent = 2 | 311 parent = 2 |
306 childcopies = childctx.p2copies() | 312 childcopies = childctx.p2copies() |
307 if not match.always(): | 313 if not match.always(): |
308 childcopies = {dst: src for dst, src in childcopies.items() | 314 childcopies = {dst: src for dst, src in childcopies.items() |
309 if match(dst)} | 315 if match(dst)} |
310 childcopies = _chain(a, childctx, copies, childcopies) | 316 childcopies = _chainandfilter(a, childctx, copies, childcopies) |
311 heapq.heappush(work, (c, parent, childcopies)) | 317 heapq.heappush(work, (c, parent, childcopies)) |
312 assert False | 318 assert False |
313 | 319 |
314 def _forwardcopies(a, b, match=None): | 320 def _forwardcopies(a, b, match=None): |
315 """find {dst@b: src@a} copy mapping where a is an ancestor of b""" | 321 """find {dst@b: src@a} copy mapping where a is an ancestor of b""" |
321 # short-circuit to avoid issues with merge states | 327 # short-circuit to avoid issues with merge states |
322 return _dirstatecopies(b._repo, match) | 328 return _dirstatecopies(b._repo, match) |
323 | 329 |
324 cm = _committedforwardcopies(a, b.p1(), match) | 330 cm = _committedforwardcopies(a, b.p1(), match) |
325 # combine copies from dirstate if necessary | 331 # combine copies from dirstate if necessary |
326 return _chain(a, b, cm, _dirstatecopies(b._repo, match)) | 332 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match)) |
327 return _committedforwardcopies(a, b, match) | 333 return _committedforwardcopies(a, b, match) |
328 | 334 |
329 def _backwardrenames(a, b, match): | 335 def _backwardrenames(a, b, match): |
330 if a._repo.ui.config('experimental', 'copytrace') == 'off': | 336 if a._repo.ui.config('experimental', 'copytrace') == 'off': |
331 return {} | 337 return {} |
365 if debug: | 371 if debug: |
366 repo.ui.debug('debug.copies: search mode: backward\n') | 372 repo.ui.debug('debug.copies: search mode: backward\n') |
367 return _backwardrenames(x, y, match=match) | 373 return _backwardrenames(x, y, match=match) |
368 if debug: | 374 if debug: |
369 repo.ui.debug('debug.copies: search mode: combined\n') | 375 repo.ui.debug('debug.copies: search mode: combined\n') |
370 return _chain(x, y, _backwardrenames(x, a, match=match), | 376 return _chainandfilter(x, y, _backwardrenames(x, a, match=match), |
371 _forwardcopies(a, y, match=match)) | 377 _forwardcopies(a, y, match=match)) |
372 | 378 |
373 def mergecopies(repo, c1, c2, base): | 379 def mergecopies(repo, c1, c2, base): |
374 """ | 380 """ |
375 Finds moves and copies between context c1 and c2 that are relevant for | 381 Finds moves and copies between context c1 and c2 that are relevant for |
376 merging. 'base' will be used as the merge base. | 382 merging. 'base' will be used as the merge base. |