comparison mercurial/merge.py @ 35171:b85962350bb3 stable

merge: cache unknown dir checks (issue5716) As mentioned in D1222, the recent pathconflicts change regresses update performance in large repositories when many files are being updated. To mitigate this, we introduce two caches of directories that have already found to be either: - unknown directories, but which are not aliased by files and so don't need to be checked if they are files again; and - missing directores, which cannot cause path conflicts, and cannot contain a file that causes a path conflict. When checking the paths of a file, testing against this caches means we can skip tests that involve touching the filesystem. Differential Revision: https://phab.mercurial-scm.org/D1224
author Mark Thomas <mbthomas@fb.com>
date Fri, 24 Nov 2017 12:53:58 -0800
parents 37450a122128
children a92b9f8e11ba
comparison
equal deleted inserted replaced
35170:c9740b69b9b7 35171:b85962350bb3
651 return (repo.wvfs.audit.check(f) 651 return (repo.wvfs.audit.check(f)
652 and repo.wvfs.isfileorlink(f) 652 and repo.wvfs.isfileorlink(f)
653 and repo.dirstate.normalize(f) not in repo.dirstate 653 and repo.dirstate.normalize(f) not in repo.dirstate
654 and mctx[f2].cmp(wctx[f])) 654 and mctx[f2].cmp(wctx[f]))
655 655
656 def _checkunknowndirs(repo, f): 656 class _unknowndirschecker(object):
657 """ 657 """
658 Look for any unknown files or directories that may have a path conflict 658 Look for any unknown files or directories that may have a path conflict
659 with a file. If any path prefix of the file exists as a file or link, 659 with a file. If any path prefix of the file exists as a file or link,
660 then it conflicts. If the file itself is a directory that contains any 660 then it conflicts. If the file itself is a directory that contains any
661 file that is not tracked, then it conflicts. 661 file that is not tracked, then it conflicts.
662 662
663 Returns the shortest path at which a conflict occurs, or None if there is 663 Returns the shortest path at which a conflict occurs, or None if there is
664 no conflict. 664 no conflict.
665 """ 665 """
666 666 def __init__(self):
667 # Check for path prefixes that exist as unknown files. 667 # A set of paths known to be good. This prevents repeated checking of
668 for p in reversed(list(util.finddirs(f))): 668 # dirs. It will be updated with any new dirs that are checked and found
669 if (repo.wvfs.audit.check(p) 669 # to be safe.
670 and repo.wvfs.isfileorlink(p) 670 self._unknowndircache = set()
671 and repo.dirstate.normalize(p) not in repo.dirstate): 671
672 return p 672 # A set of paths that are known to be absent. This prevents repeated
673 673 # checking of subdirectories that are known not to exist. It will be
674 # Check if the file conflicts with a directory containing unknown files. 674 # updated with any new dirs that are checked and found to be absent.
675 if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f): 675 self._missingdircache = set()
676 # Does the directory contain any files that are not in the dirstate? 676
677 for p, dirs, files in repo.wvfs.walk(f): 677 def __call__(self, repo, f):
678 for fn in files: 678 # Check for path prefixes that exist as unknown files.
679 relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn)) 679 for p in reversed(list(util.finddirs(f))):
680 if relf not in repo.dirstate: 680 if p in self._missingdircache:
681 return f 681 return
682 return None 682 if p in self._unknowndircache:
683 continue
684 if repo.wvfs.audit.check(p):
685 if (repo.wvfs.isfileorlink(p)
686 and repo.dirstate.normalize(p) not in repo.dirstate):
687 return p
688 if not repo.wvfs.lexists(p):
689 self._missingdircache.add(p)
690 return
691 self._unknowndircache.add(p)
692
693 # Check if the file conflicts with a directory containing unknown files.
694 if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
695 # Does the directory contain any files that are not in the dirstate?
696 for p, dirs, files in repo.wvfs.walk(f):
697 for fn in files:
698 relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn))
699 if relf not in repo.dirstate:
700 return f
701 return None
683 702
684 def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce): 703 def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce):
685 """ 704 """
686 Considers any actions that care about the presence of conflicting unknown 705 Considers any actions that care about the presence of conflicting unknown
687 files. For some actions, the result is to abort; for others, it is to 706 files. For some actions, the result is to abort; for others, it is to
699 if config == 'abort': 718 if config == 'abort':
700 abortconflicts.update(conflicts) 719 abortconflicts.update(conflicts)
701 elif config == 'warn': 720 elif config == 'warn':
702 warnconflicts.update(conflicts) 721 warnconflicts.update(conflicts)
703 722
723 checkunknowndirs = _unknowndirschecker()
704 for f, (m, args, msg) in actions.iteritems(): 724 for f, (m, args, msg) in actions.iteritems():
705 if m in ('c', 'dc'): 725 if m in ('c', 'dc'):
706 if _checkunknownfile(repo, wctx, mctx, f): 726 if _checkunknownfile(repo, wctx, mctx, f):
707 fileconflicts.add(f) 727 fileconflicts.add(f)
708 elif pathconfig and f not in wctx: 728 elif pathconfig and f not in wctx:
709 path = _checkunknowndirs(repo, f) 729 path = checkunknowndirs(repo, f)
710 if path is not None: 730 if path is not None:
711 pathconflicts.add(path) 731 pathconflicts.add(path)
712 elif m == 'dg': 732 elif m == 'dg':
713 if _checkunknownfile(repo, wctx, mctx, f, args[0]): 733 if _checkunknownfile(repo, wctx, mctx, f, args[0]):
714 fileconflicts.add(f) 734 fileconflicts.add(f)