comparison mercurial/copies.py @ 42373:f3d06d37e194

copies: split up _chain() in naive chaining and filtering steps The function now has two clearly defined steps. The first step is the actual chaining. This step is very cheap. The second step is filtering out invalid copies. This step is expensive. For changeset-centric copy tracing, I want to do the filtering step only at the end. This patch prepares for that. Differential Revision: https://phab.mercurial-scm.org/D6418
author Martin von Zweigbergk <martinvonz@google.com>
date Thu, 09 May 2019 15:09:07 -0700
parents c361db7a5f14
children 4c39c99d9492
comparison
equal deleted inserted replaced
42372:ba6ca4e80607 42373:f3d06d37e194
105 # yet the filelog has the copy information in rev 1 and we will not look 105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates. 106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa. 107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b) 108 return min(limit, a, b)
109 109
110 def _chain(src, dst, a, b): 110 def _chainandfilter(src, dst, a, b):
111 """chain two sets of copies 'a' and 'b'""" 111 """chain two sets of copies 'a' and 'b' and filter result"""
112 112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with 113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the 114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is 115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and 116 # where a file existed in 'src' and remained under that name in 'mid' and
121 # 2 x y y x->y 121 # 2 x y y x->y
122 # 3 x y x - 122 # 3 x y x -
123 # 4 x y z x->z 123 # 4 x y z x->z
124 # 5 - x y - 124 # 5 - x y -
125 # 6 x x y x->y 125 # 6 x x y x->y
126 126 #
127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove 127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be 128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 # overwritten later, and case 3 will be removed later. 129 # between 5 and 6, so it includes all cases in its result.
130 # Cases 1, 3, and 5 are then removed by _filter().
131
132 t = _chain(a, b)
133 _filter(src, dst, t)
134 return t
135
136 def _filter(src, dst, t):
137 """filters out invalid copies after chaining"""
138 for k, v in list(t.items()):
139 # remove copies from files that didn't exist
140 if v not in src:
141 del t[k]
142 # remove criss-crossed copies
143 elif k in src and v in dst:
144 del t[k]
145 # remove copies to files that were then removed
146 elif k not in dst:
147 del t[k]
148
149 def _chain(a, b):
150 """chain two sets of copies 'a' and 'b'"""
130 t = a.copy() 151 t = a.copy()
131 for k, v in b.iteritems(): 152 for k, v in b.iteritems():
132 if v in t: 153 if v in t:
133 # Found a chain, i.e. cases 3 & 4. We'll remove case 3 later.
134 t[k] = t[v] 154 t[k] = t[v]
135 else: 155 else:
136 # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
137 t[k] = v 156 t[k] = v
138
139 for k, v in list(t.items()):
140 # remove copies from files that didn't exist, i.e. case 5
141 if v not in src:
142 del t[k]
143 # remove criss-crossed copies, i.e. case 3
144 elif k in src and v in dst:
145 del t[k]
146 # remove copies to files that were then removed, i.e. case 1
147 # and file 'y' in cases 3 & 4 (in case of rename)
148 elif k not in dst:
149 del t[k]
150
151 return t 157 return t
152 158
153 def _tracefile(fctx, am, limit): 159 def _tracefile(fctx, am, limit):
154 """return file context that is the ancestor of fctx present in ancestor 160 """return file context that is the ancestor of fctx present in ancestor
155 manifest am, stopping after the first ancestor lower than limit""" 161 manifest am, stopping after the first ancestor lower than limit"""
305 parent = 2 311 parent = 2
306 childcopies = childctx.p2copies() 312 childcopies = childctx.p2copies()
307 if not match.always(): 313 if not match.always():
308 childcopies = {dst: src for dst, src in childcopies.items() 314 childcopies = {dst: src for dst, src in childcopies.items()
309 if match(dst)} 315 if match(dst)}
310 childcopies = _chain(a, childctx, copies, childcopies) 316 childcopies = _chainandfilter(a, childctx, copies, childcopies)
311 heapq.heappush(work, (c, parent, childcopies)) 317 heapq.heappush(work, (c, parent, childcopies))
312 assert False 318 assert False
313 319
314 def _forwardcopies(a, b, match=None): 320 def _forwardcopies(a, b, match=None):
315 """find {dst@b: src@a} copy mapping where a is an ancestor of b""" 321 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
321 # short-circuit to avoid issues with merge states 327 # short-circuit to avoid issues with merge states
322 return _dirstatecopies(b._repo, match) 328 return _dirstatecopies(b._repo, match)
323 329
324 cm = _committedforwardcopies(a, b.p1(), match) 330 cm = _committedforwardcopies(a, b.p1(), match)
325 # combine copies from dirstate if necessary 331 # combine copies from dirstate if necessary
326 return _chain(a, b, cm, _dirstatecopies(b._repo, match)) 332 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
327 return _committedforwardcopies(a, b, match) 333 return _committedforwardcopies(a, b, match)
328 334
329 def _backwardrenames(a, b, match): 335 def _backwardrenames(a, b, match):
330 if a._repo.ui.config('experimental', 'copytrace') == 'off': 336 if a._repo.ui.config('experimental', 'copytrace') == 'off':
331 return {} 337 return {}
365 if debug: 371 if debug:
366 repo.ui.debug('debug.copies: search mode: backward\n') 372 repo.ui.debug('debug.copies: search mode: backward\n')
367 return _backwardrenames(x, y, match=match) 373 return _backwardrenames(x, y, match=match)
368 if debug: 374 if debug:
369 repo.ui.debug('debug.copies: search mode: combined\n') 375 repo.ui.debug('debug.copies: search mode: combined\n')
370 return _chain(x, y, _backwardrenames(x, a, match=match), 376 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
371 _forwardcopies(a, y, match=match)) 377 _forwardcopies(a, y, match=match))
372 378
373 def mergecopies(repo, c1, c2, base): 379 def mergecopies(repo, c1, c2, base):
374 """ 380 """
375 Finds moves and copies between context c1 and c2 that are relevant for 381 Finds moves and copies between context c1 and c2 that are relevant for
376 merging. 'base' will be used as the merge base. 382 merging. 'base' will be used as the merge base.