Mercurial > hg
comparison mercurial/copies.py @ 44758:45f3f35cefe7
copies: fix the changeset based algorithm regarding merge
In 99ebde4fec99, we changed the list of files stored into the `files` field.
This lead to the changeset centric copy algorithm to break in various merge
situation involving merge. Older information could reach the merge through
`p1`, and while information from `p2` was strictly fresher, it would get
overwritten anyway.
We update the situation with more details about which revision introduces rename
information. This help use making the right decision in case of merge.
We are now running a more comprehensive suite of test with include this kind of
situation. The behavior differ slightly from the filelog based in a couple of
instance. There is mostly two distinct cases:
1) there are conflicting rename information in a merge (different rename history
on each side). In this case the filelog based implementation arbitrarily pick a
side based on the file-revision-number. So it depends on a local factor. The
changeset centric algorithm will use a deterministic approach, by picking the
information coming from the first parent of the merge. This is stable across
different clone.
2) rename information related to file that exist in both source and destination.
The filelog based implementation do not even try to detect these, however the
changeset centric one get them for "free" (it is simpler to detect them than
not).
The new implementation focus on correctness. Performance improvement will come
later.
Differential Revision: https://phab.mercurial-scm.org/D8244
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Thu, 05 Mar 2020 17:55:05 +0100 |
parents | 30862e226339 |
children | 61719b9658b1 |
comparison
equal
deleted
inserted
replaced
44757:f365dfede78f | 44758:45f3f35cefe7 |
---|---|
181 * p1: revision number of first parent | 181 * p1: revision number of first parent |
182 * p2: revision number of first parent | 182 * p2: revision number of first parent |
183 * p1copies: mapping of copies from p1 | 183 * p1copies: mapping of copies from p1 |
184 * p2copies: mapping of copies from p2 | 184 * p2copies: mapping of copies from p2 |
185 * removed: a list of removed files | 185 * removed: a list of removed files |
186 * ismerged: a callback to know if file was merged in that revision | |
186 """ | 187 """ |
187 cl = repo.changelog | 188 cl = repo.changelog |
188 parents = cl.parentrevs | 189 parents = cl.parentrevs |
190 | |
191 def get_ismerged(rev): | |
192 ctx = repo[rev] | |
193 | |
194 def ismerged(path): | |
195 if path not in ctx.files(): | |
196 return False | |
197 fctx = ctx[path] | |
198 parents = fctx._filelog.parents(fctx._filenode) | |
199 nb_parents = 0 | |
200 for n in parents: | |
201 if n != node.nullid: | |
202 nb_parents += 1 | |
203 return nb_parents >= 2 | |
204 | |
205 return ismerged | |
189 | 206 |
190 if repo.filecopiesmode == b'changeset-sidedata': | 207 if repo.filecopiesmode == b'changeset-sidedata': |
191 changelogrevision = cl.changelogrevision | 208 changelogrevision = cl.changelogrevision |
192 flags = cl.flags | 209 flags = cl.flags |
193 | 210 |
216 # time to save memory. | 233 # time to save memory. |
217 merge_caches = {} | 234 merge_caches = {} |
218 | 235 |
219 def revinfo(rev): | 236 def revinfo(rev): |
220 p1, p2 = parents(rev) | 237 p1, p2 = parents(rev) |
238 value = None | |
221 if flags(rev) & REVIDX_SIDEDATA: | 239 if flags(rev) & REVIDX_SIDEDATA: |
222 e = merge_caches.pop(rev, None) | 240 e = merge_caches.pop(rev, None) |
223 if e is not None: | 241 if e is not None: |
224 return e | 242 return e |
225 c = changelogrevision(rev) | 243 c = changelogrevision(rev) |
226 p1copies = c.p1copies | 244 p1copies = c.p1copies |
227 p2copies = c.p2copies | 245 p2copies = c.p2copies |
228 removed = c.filesremoved | 246 removed = c.filesremoved |
229 if p1 != node.nullrev and p2 != node.nullrev: | 247 if p1 != node.nullrev and p2 != node.nullrev: |
230 # XXX some case we over cache, IGNORE | 248 # XXX some case we over cache, IGNORE |
231 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed) | 249 value = merge_caches[rev] = ( |
250 p1, | |
251 p2, | |
252 p1copies, | |
253 p2copies, | |
254 removed, | |
255 get_ismerged(rev), | |
256 ) | |
232 else: | 257 else: |
233 p1copies = {} | 258 p1copies = {} |
234 p2copies = {} | 259 p2copies = {} |
235 removed = [] | 260 removed = [] |
236 return p1, p2, p1copies, p2copies, removed | 261 |
262 if value is None: | |
263 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev)) | |
264 return value | |
237 | 265 |
238 else: | 266 else: |
239 | 267 |
240 def revinfo(rev): | 268 def revinfo(rev): |
241 p1, p2 = parents(rev) | 269 p1, p2 = parents(rev) |
242 ctx = repo[rev] | 270 ctx = repo[rev] |
243 p1copies, p2copies = ctx._copies | 271 p1copies, p2copies = ctx._copies |
244 removed = ctx.filesremoved() | 272 removed = ctx.filesremoved() |
245 return p1, p2, p1copies, p2copies, removed | 273 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev) |
246 | 274 |
247 return revinfo | 275 return revinfo |
248 | 276 |
249 | 277 |
250 def _changesetforwardcopies(a, b, match): | 278 def _changesetforwardcopies(a, b, match): |
254 repo = a.repo().unfiltered() | 282 repo = a.repo().unfiltered() |
255 children = {} | 283 children = {} |
256 revinfo = _revinfogetter(repo) | 284 revinfo = _revinfogetter(repo) |
257 | 285 |
258 cl = repo.changelog | 286 cl = repo.changelog |
287 isancestor = cl.isancestorrev # XXX we should had chaching to this. | |
259 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()]) | 288 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()]) |
260 mrset = set(missingrevs) | 289 mrset = set(missingrevs) |
261 roots = set() | 290 roots = set() |
262 for r in missingrevs: | 291 for r in missingrevs: |
263 for p in cl.parentrevs(r): | 292 for p in cl.parentrevs(r): |
281 iterrevs = set(from_head) | 310 iterrevs = set(from_head) |
282 iterrevs &= mrset | 311 iterrevs &= mrset |
283 iterrevs.update(roots) | 312 iterrevs.update(roots) |
284 iterrevs.remove(b.rev()) | 313 iterrevs.remove(b.rev()) |
285 revs = sorted(iterrevs) | 314 revs = sorted(iterrevs) |
286 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match) | 315 return _combinechangesetcopies( |
287 | 316 revs, children, b.rev(), revinfo, match, isancestor |
288 | 317 ) |
289 def _combinechangesetcopies(revs, children, targetrev, revinfo, match): | 318 |
319 | |
320 def _combinechangesetcopies( | |
321 revs, children, targetrev, revinfo, match, isancestor | |
322 ): | |
290 """combine the copies information for each item of iterrevs | 323 """combine the copies information for each item of iterrevs |
291 | 324 |
292 revs: sorted iterable of revision to visit | 325 revs: sorted iterable of revision to visit |
293 children: a {parent: [children]} mapping. | 326 children: a {parent: [children]} mapping. |
294 targetrev: the final copies destination revision (not in iterrevs) | 327 targetrev: the final copies destination revision (not in iterrevs) |
303 copies = all_copies.pop(r, None) | 336 copies = all_copies.pop(r, None) |
304 if copies is None: | 337 if copies is None: |
305 # this is a root | 338 # this is a root |
306 copies = {} | 339 copies = {} |
307 for i, c in enumerate(children[r]): | 340 for i, c in enumerate(children[r]): |
308 p1, p2, p1copies, p2copies, removed = revinfo(c) | 341 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c) |
309 if r == p1: | 342 if r == p1: |
310 parent = 1 | 343 parent = 1 |
311 childcopies = p1copies | 344 childcopies = p1copies |
312 else: | 345 else: |
313 assert r == p2 | 346 assert r == p2 |
317 childcopies = { | 350 childcopies = { |
318 dst: src for dst, src in childcopies.items() if match(dst) | 351 dst: src for dst, src in childcopies.items() if match(dst) |
319 } | 352 } |
320 newcopies = copies | 353 newcopies = copies |
321 if childcopies: | 354 if childcopies: |
322 newcopies = _chain(newcopies, childcopies) | 355 newcopies = copies.copy() |
323 # _chain makes a copies, we can avoid doing so in some | 356 for dest, source in pycompat.iteritems(childcopies): |
324 # simple/linear cases. | 357 prev = copies.get(source) |
358 if prev is not None and prev[1] is not None: | |
359 source = prev[1] | |
360 newcopies[dest] = (c, source) | |
325 assert newcopies is not copies | 361 assert newcopies is not copies |
326 for f in removed: | 362 for f in removed: |
327 if f in newcopies: | 363 if f in newcopies: |
328 if newcopies is copies: | 364 if newcopies is copies: |
329 # copy on write to avoid affecting potential other | 365 # copy on write to avoid affecting potential other |
330 # branches. when there are no other branches, this | 366 # branches. when there are no other branches, this |
331 # could be avoided. | 367 # could be avoided. |
332 newcopies = copies.copy() | 368 newcopies = copies.copy() |
333 del newcopies[f] | 369 newcopies[f] = (c, None) |
334 othercopies = all_copies.get(c) | 370 othercopies = all_copies.get(c) |
335 if othercopies is None: | 371 if othercopies is None: |
336 all_copies[c] = newcopies | 372 all_copies[c] = newcopies |
337 else: | 373 else: |
338 # we are the second parent to work on c, we need to merge our | 374 # we are the second parent to work on c, we need to merge our |
339 # work with the other. | 375 # work with the other. |
340 # | 376 # |
341 # Unlike when copies are stored in the filelog, we consider | |
342 # it a copy even if the destination already existed on the | |
343 # other branch. It's simply too expensive to check if the | |
344 # file existed in the manifest. | |
345 # | |
346 # In case of conflict, parent 1 take precedence over parent 2. | 377 # In case of conflict, parent 1 take precedence over parent 2. |
347 # This is an arbitrary choice made anew when implementing | 378 # This is an arbitrary choice made anew when implementing |
348 # changeset based copies. It was made without regards with | 379 # changeset based copies. It was made without regards with |
349 # potential filelog related behavior. | 380 # potential filelog related behavior. |
350 if parent == 1: | 381 if parent == 1: |
351 othercopies.update(newcopies) | 382 _merge_copies_dict( |
383 othercopies, newcopies, isancestor, ismerged | |
384 ) | |
352 else: | 385 else: |
353 newcopies.update(othercopies) | 386 _merge_copies_dict( |
387 newcopies, othercopies, isancestor, ismerged | |
388 ) | |
354 all_copies[c] = newcopies | 389 all_copies[c] = newcopies |
355 return all_copies[targetrev] | 390 |
391 final_copies = {} | |
392 for dest, (tt, source) in all_copies[targetrev].items(): | |
393 if source is not None: | |
394 final_copies[dest] = source | |
395 return final_copies | |
396 | |
397 | |
398 def _merge_copies_dict(minor, major, isancestor, ismerged): | |
399 """merge two copies-mapping together, minor and major | |
400 | |
401 In case of conflict, value from "major" will be picked. | |
402 | |
403 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an | |
404 ancestors of `high_rev`, | |
405 | |
406 - `ismerged(path)`: callable return True if `path` have been merged in the | |
407 current revision, | |
408 """ | |
409 for dest, value in major.items(): | |
410 other = minor.get(dest) | |
411 if other is None: | |
412 minor[dest] = value | |
413 else: | |
414 new_tt = value[0] | |
415 other_tt = other[0] | |
416 if value[1] == other[1]: | |
417 continue | |
418 # content from "major" wins, unless it is older | |
419 # than the branch point or there is a merge | |
420 if ( | |
421 new_tt == other_tt | |
422 or not isancestor(new_tt, other_tt) | |
423 or ismerged(dest) | |
424 ): | |
425 minor[dest] = value | |
356 | 426 |
357 | 427 |
358 def _forwardcopies(a, b, base=None, match=None): | 428 def _forwardcopies(a, b, base=None, match=None): |
359 """find {dst@b: src@a} copy mapping where a is an ancestor of b""" | 429 """find {dst@b: src@a} copy mapping where a is an ancestor of b""" |
360 | 430 |