comparison mercurial/copies.py @ 45637:ad6ebb6f0dfe

copies: make two version of the changeset centric algorithm They are two main ways to run the changeset-centric copy-tracing algorithm. One fed from data stored in side-data and still in development, and one based on data stored in extra (with a "compatibility" mode). The `extra` based is used in production at Google, but still experimental in code. It is mostly unsuitable for other users because it affects the hash. The side-data based storage and algorithm have been evolving to store more data, cover more cases (mostly around merge, that Google do not really care about) and use lower level storage for efficiency. All this changes make is increasingly hard to maintain de common code base, without impacting code complexity and performance. For example, the compatibility mode requires to keep things at different level than what we need for side-data. So, I am duplicating the involved functions. The newly added `_extra` variants will be kept as today, while I will do some deeper rework of the side data versions. Long terms, the side-data version should be more featureful and performant than the extra based version, so I expect the duplicated `_extra` functions to eventually get dropped. Differential Revision: https://phab.mercurial-scm.org/D9114
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Fri, 25 Sep 2020 14:39:04 +0200
parents fb000408bca5
children 4f876e6b30fa
comparison
equal deleted inserted replaced
45636:053c9014fd39 45637:ad6ebb6f0dfe
307 iterrevs = set(from_head) 307 iterrevs = set(from_head)
308 iterrevs &= mrset 308 iterrevs &= mrset
309 iterrevs.update(roots) 309 iterrevs.update(roots)
310 iterrevs.remove(b.rev()) 310 iterrevs.remove(b.rev())
311 revs = sorted(iterrevs) 311 revs = sorted(iterrevs)
312 return _combine_changeset_copies( 312
313 revs, children, b.rev(), revinfo, match, isancestor 313 if repo.filecopiesmode == b'changeset-sidedata':
314 ) 314 return _combine_changeset_copies(
315 revs, children, b.rev(), revinfo, match, isancestor
316 )
317 else:
318 return _combine_changeset_copies_extra(
319 revs, children, b.rev(), revinfo, match, isancestor
320 )
315 321
316 322
317 def _combine_changeset_copies( 323 def _combine_changeset_copies(
318 revs, children, targetrev, revinfo, match, isancestor 324 revs, children, targetrev, revinfo, match, isancestor
319 ): 325 ):
420 or ismerged(dest) 426 or ismerged(dest)
421 ): 427 ):
422 minor[dest] = value 428 minor[dest] = value
423 429
424 430
431 def _combine_changeset_copies_extra(
432 revs, children, targetrev, revinfo, match, isancestor
433 ):
434 """version of `_combine_changeset_copies` that works with the Google
435 specific "extra" based storage for copy information"""
436 all_copies = {}
437 alwaysmatch = match.always()
438 for r in revs:
439 copies = all_copies.pop(r, None)
440 if copies is None:
441 # this is a root
442 copies = {}
443 for i, c in enumerate(children[r]):
444 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
445 if r == p1:
446 parent = 1
447 childcopies = p1copies
448 else:
449 assert r == p2
450 parent = 2
451 childcopies = p2copies
452 if not alwaysmatch:
453 childcopies = {
454 dst: src for dst, src in childcopies.items() if match(dst)
455 }
456 newcopies = copies
457 if childcopies:
458 newcopies = copies.copy()
459 for dest, source in pycompat.iteritems(childcopies):
460 prev = copies.get(source)
461 if prev is not None and prev[1] is not None:
462 source = prev[1]
463 newcopies[dest] = (c, source)
464 assert newcopies is not copies
465 for f in removed:
466 if f in newcopies:
467 if newcopies is copies:
468 # copy on write to avoid affecting potential other
469 # branches. when there are no other branches, this
470 # could be avoided.
471 newcopies = copies.copy()
472 newcopies[f] = (c, None)
473 othercopies = all_copies.get(c)
474 if othercopies is None:
475 all_copies[c] = newcopies
476 else:
477 # we are the second parent to work on c, we need to merge our
478 # work with the other.
479 #
480 # In case of conflict, parent 1 take precedence over parent 2.
481 # This is an arbitrary choice made anew when implementing
482 # changeset based copies. It was made without regards with
483 # potential filelog related behavior.
484 if parent == 1:
485 _merge_copies_dict_extra(
486 othercopies, newcopies, isancestor, ismerged
487 )
488 else:
489 _merge_copies_dict_extra(
490 newcopies, othercopies, isancestor, ismerged
491 )
492 all_copies[c] = newcopies
493
494 final_copies = {}
495 for dest, (tt, source) in all_copies[targetrev].items():
496 if source is not None:
497 final_copies[dest] = source
498 return final_copies
499
500
501 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
502 """version of `_merge_copies_dict` that works with the Google
503 specific "extra" based storage for copy information"""
504 for dest, value in major.items():
505 other = minor.get(dest)
506 if other is None:
507 minor[dest] = value
508 else:
509 new_tt = value[0]
510 other_tt = other[0]
511 if value[1] == other[1]:
512 continue
513 # content from "major" wins, unless it is older
514 # than the branch point or there is a merge
515 if (
516 new_tt == other_tt
517 or not isancestor(new_tt, other_tt)
518 or ismerged(dest)
519 ):
520 minor[dest] = value
521
522
425 def _forwardcopies(a, b, base=None, match=None): 523 def _forwardcopies(a, b, base=None, match=None):
426 """find {dst@b: src@a} copy mapping where a is an ancestor of b""" 524 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
427 525
428 if base is None: 526 if base is None:
429 base = a 527 base = a