Mercurial > hg-stable
view mercurial/hg.py @ 11060:e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
We speed up 'findrenames' for the usecase when a user specifies they
want a similarity of 100% by matching files by their exact SHA1 hash
value. This reduces the number of comparisons required to find exact
matches from O(n^2) to O(n).
While it would be nice if we could just use mercurial's pre-calculated
SHA1 hash for existing files, this hash includes the file's ancestor
information making it unsuitable for our purposes. Instead, we calculate
the hash of old content from scratch.
The following benchmarks were taken on the current head of crew:
addremove 100% similarity:
rm -rf *; hg up -C; mv tests tests.new
hg --time addremove -s100 --dry-run
before: real 176.350 secs (user 128.890+0.000 sys 47.430+0.000)
after: real 2.130 secs (user 1.890+0.000 sys 0.240+0.000)
addremove 75% similarity:
rm -rf *; hg up -C; mv tests tests.new; \
for i in tests.new/*; do echo x >> $i; done
hg --time addremove -s75 --dry-run
before: real 264.560 secs (user 215.130+0.000 sys 49.410+0.000)
after: real 218.710 secs (user 172.790+0.000 sys 45.870+0.000)
author | David Greenaway <hg-dev@davidgreenaway.com> |
---|---|
date | Sat, 03 Apr 2010 11:58:16 +1100 |
parents | 71cf11f03b3d |
children | b203a95fe68b |
line wrap: on
line source
# hg.py - repository classes for mercurial # # Copyright 2005-2007 Matt Mackall <mpm@selenic.com> # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from i18n import _ from lock import release import localrepo, bundlerepo, httprepo, sshrepo, statichttprepo import lock, util, extensions, error, encoding, node import merge as mergemod import verify as verifymod import errno, os, shutil def _local(path): return (os.path.isfile(util.drop_scheme('file', path)) and bundlerepo or localrepo) def addbranchrevs(lrepo, repo, branches, revs): if not branches: return revs or None, revs and revs[0] or None revs = revs and list(revs) or [] if not repo.capable('branchmap'): revs.extend(branches) return revs, revs[0] branchmap = repo.branchmap() for branch in branches: if branch == '.': if not lrepo or not lrepo.local(): raise util.Abort(_("dirstate branch not accessible")) revs.append(lrepo.dirstate.branch()) else: butf8 = encoding.fromlocal(branch) if butf8 in branchmap: revs.extend(node.hex(r) for r in reversed(branchmap[butf8])) else: revs.append(branch) return revs, revs[0] def parseurl(url, branches=None): '''parse url#branch, returning url, branches+[branch]''' if '#' not in url: return url, branches or [] url, branch = url.split('#', 1) return url, (branches or []) + [branch] schemes = { 'bundle': bundlerepo, 'file': _local, 'http': httprepo, 'https': httprepo, 'ssh': sshrepo, 'static-http': statichttprepo, } def _lookup(path): scheme = 'file' if path: c = path.find(':') if c > 0: scheme = path[:c] thing = schemes.get(scheme) or schemes['file'] try: return thing(path) except TypeError: return thing def islocal(repo): '''return true if repo or path is local''' if isinstance(repo, str): try: return _lookup(repo).islocal(repo) except AttributeError: return False return repo.local() def repository(ui, path='', create=False): """return a repository object for the specified path""" repo = _lookup(path).instance(ui, path, create) ui = getattr(repo, "ui", ui) for name, module in extensions.extensions(): hook = getattr(module, 'reposetup', None) if hook: hook(ui, repo) return repo def defaultdest(source): '''return default destination of clone if none is given''' return os.path.basename(os.path.normpath(source)) def localpath(path): if path.startswith('file://localhost/'): return path[16:] if path.startswith('file://'): return path[7:] if path.startswith('file:'): return path[5:] return path def share(ui, source, dest=None, update=True): '''create a shared repository''' if not islocal(source): raise util.Abort(_('can only share local repositories')) if not dest: dest = defaultdest(source) else: dest = ui.expandpath(dest) if isinstance(source, str): origsource = ui.expandpath(source) source, branches = parseurl(origsource) srcrepo = repository(ui, source) rev, checkout = addbranchrevs(srcrepo, srcrepo, branches, None) else: srcrepo = source origsource = source = srcrepo.url() checkout = None sharedpath = srcrepo.sharedpath # if our source is already sharing root = os.path.realpath(dest) roothg = os.path.join(root, '.hg') if os.path.exists(roothg): raise util.Abort(_('destination already exists')) if not os.path.isdir(root): os.mkdir(root) os.mkdir(roothg) requirements = '' try: requirements = srcrepo.opener('requires').read() except IOError, inst: if inst.errno != errno.ENOENT: raise requirements += 'shared\n' file(os.path.join(roothg, 'requires'), 'w').write(requirements) file(os.path.join(roothg, 'sharedpath'), 'w').write(sharedpath) default = srcrepo.ui.config('paths', 'default') if default: f = file(os.path.join(roothg, 'hgrc'), 'w') f.write('[paths]\ndefault = %s\n' % default) f.close() r = repository(ui, root) if update: r.ui.status(_("updating working directory\n")) if update is not True: checkout = update for test in (checkout, 'default', 'tip'): if test is None: continue try: uprev = r.lookup(test) break except error.RepoLookupError: continue _update(r, uprev) def clone(ui, source, dest=None, pull=False, rev=None, update=True, stream=False, branch=None): """Make a copy of an existing repository. Create a copy of an existing repository in a new directory. The source and destination are URLs, as passed to the repository function. Returns a pair of repository objects, the source and newly created destination. The location of the source is added to the new repository's .hg/hgrc file, as the default to be used for future pulls and pushes. If an exception is raised, the partly cloned/updated destination repository will be deleted. Arguments: source: repository object or URL dest: URL of destination repository to create (defaults to base name of source repository) pull: always pull from source repository, even in local case stream: stream raw data uncompressed from repository (fast over LAN, slow over WAN) rev: revision to clone up to (implies pull=True) update: update working directory after clone completes, if destination is local repository (True means update to default rev, anything else is treated as a revision) branch: branches to clone """ if isinstance(source, str): origsource = ui.expandpath(source) source, branch = parseurl(origsource, branch) src_repo = repository(ui, source) else: src_repo = source branch = None origsource = source = src_repo.url() rev, checkout = addbranchrevs(src_repo, src_repo, branch, rev) if dest is None: dest = defaultdest(source) ui.status(_("destination directory: %s\n") % dest) else: dest = ui.expandpath(dest) dest = localpath(dest) source = localpath(source) if os.path.exists(dest): if not os.path.isdir(dest): raise util.Abort(_("destination '%s' already exists") % dest) elif os.listdir(dest): raise util.Abort(_("destination '%s' is not empty") % dest) class DirCleanup(object): def __init__(self, dir_): self.rmtree = shutil.rmtree self.dir_ = dir_ def close(self): self.dir_ = None def cleanup(self): if self.dir_: self.rmtree(self.dir_, True) src_lock = dest_lock = dir_cleanup = None try: if islocal(dest): dir_cleanup = DirCleanup(dest) abspath = origsource copy = False if src_repo.cancopy() and islocal(dest): abspath = os.path.abspath(util.drop_scheme('file', origsource)) copy = not pull and not rev if copy: try: # we use a lock here because if we race with commit, we # can end up with extra data in the cloned revlogs that's # not pointed to by changesets, thus causing verify to # fail src_lock = src_repo.lock(wait=False) except error.LockError: copy = False if copy: src_repo.hook('preoutgoing', throw=True, source='clone') hgdir = os.path.realpath(os.path.join(dest, ".hg")) if not os.path.exists(dest): os.mkdir(dest) else: # only clean up directories we create ourselves dir_cleanup.dir_ = hgdir try: dest_path = hgdir os.mkdir(dest_path) except OSError, inst: if inst.errno == errno.EEXIST: dir_cleanup.close() raise util.Abort(_("destination '%s' already exists") % dest) raise for f in src_repo.store.copylist(): src = os.path.join(src_repo.sharedpath, f) dst = os.path.join(dest_path, f) dstbase = os.path.dirname(dst) if dstbase and not os.path.exists(dstbase): os.mkdir(dstbase) if os.path.exists(src): if dst.endswith('data'): # lock to avoid premature writing to the target dest_lock = lock.lock(os.path.join(dstbase, "lock")) util.copyfiles(src, dst) # we need to re-init the repo after manually copying the data # into it dest_repo = repository(ui, dest) src_repo.hook('outgoing', source='clone', node='0'*40) else: try: dest_repo = repository(ui, dest, create=True) except OSError, inst: if inst.errno == errno.EEXIST: dir_cleanup.close() raise util.Abort(_("destination '%s' already exists") % dest) raise revs = None if rev: if 'lookup' not in src_repo.capabilities: raise util.Abort(_("src repository does not support " "revision lookup and so doesn't " "support clone by revision")) revs = [src_repo.lookup(r) for r in rev] checkout = revs[0] if dest_repo.local(): dest_repo.clone(src_repo, heads=revs, stream=stream) elif src_repo.local(): src_repo.push(dest_repo, revs=revs) else: raise util.Abort(_("clone from remote to remote not supported")) if dir_cleanup: dir_cleanup.close() if dest_repo.local(): fp = dest_repo.opener("hgrc", "w", text=True) fp.write("[paths]\n") fp.write("default = %s\n" % abspath) fp.close() dest_repo.ui.setconfig('paths', 'default', abspath) if update: if update is not True: checkout = update if src_repo.local(): checkout = src_repo.lookup(update) for test in (checkout, 'default', 'tip'): if test is None: continue try: uprev = dest_repo.lookup(test) break except error.RepoLookupError: continue bn = dest_repo[uprev].branch() dest_repo.ui.status(_("updating to branch %s\n") % encoding.tolocal(bn)) _update(dest_repo, uprev) return src_repo, dest_repo finally: release(src_lock, dest_lock) if dir_cleanup is not None: dir_cleanup.cleanup() def _showstats(repo, stats): repo.ui.status(_("%d files updated, %d files merged, " "%d files removed, %d files unresolved\n") % stats) def update(repo, node): """update the working directory to node, merging linear changes""" stats = mergemod.update(repo, node, False, False, None) _showstats(repo, stats) if stats[3]: repo.ui.status(_("use 'hg resolve' to retry unresolved file merges\n")) return stats[3] > 0 # naming conflict in clone() _update = update def clean(repo, node, show_stats=True): """forcibly switch the working directory to node, clobbering changes""" stats = mergemod.update(repo, node, False, True, None) if show_stats: _showstats(repo, stats) return stats[3] > 0 def merge(repo, node, force=None, remind=True): """branch merge with node, resolving changes""" stats = mergemod.update(repo, node, True, force, False) _showstats(repo, stats) if stats[3]: repo.ui.status(_("use 'hg resolve' to retry unresolved file merges " "or 'hg update -C' to abandon\n")) elif remind: repo.ui.status(_("(branch merge, don't forget to commit)\n")) return stats[3] > 0 def revert(repo, node, choose): """revert changes to revision in node without updating dirstate""" return mergemod.update(repo, node, False, True, choose)[3] > 0 def verify(repo): """verify the consistency of a repository""" return verifymod.verify(repo)