Mercurial > hg
diff hgext/remotefilelog/__init__.py @ 40495:3a333a582d7b
remotefilelog: import pruned-down remotefilelog extension from hg-experimental
This is remotefilelog as of my recent patches for compatibility with
current tip of hg, minus support for old versions of Mercurial and
some FB-specific features like their treemanifest extension and
fetching linkrev data from a patched phabricator. The file extutil.py
moved from hgext3rd to remotefilelog.
This is not yet ready to be landed, consider it a preview for
now. Planned changes include:
* replace lz4 with zstd
* rename some capabilities, requirements and wireproto commands to mark
them as experimental
* consolidate bits of shallowutil with related functions (eg readfile)
I'm certainly open to other (small) changes, but my rough mission is
to land this largely as-is so we can use it as a model of the
functionality we need going forward for lazy-fetching of file contents
from a server.
# no-check-commit because of a few foo_bar functions
Differential Revision: https://phab.mercurial-scm.org/D4782
author | Augie Fackler <augie@google.com> |
---|---|
date | Thu, 27 Sep 2018 13:03:19 -0400 |
parents | |
children | 6d64e2abe8d3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgext/remotefilelog/__init__.py Thu Sep 27 13:03:19 2018 -0400 @@ -0,0 +1,1106 @@ +# __init__.py - remotefilelog extension +# +# Copyright 2013 Facebook, Inc. +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. +"""remotefilelog causes Mercurial to lazilly fetch file contents (EXPERIMENTAL) + +Configs: + + ``packs.maxchainlen`` specifies the maximum delta chain length in pack files + ``packs.maxpacksize`` specifies the maximum pack file size + ``packs.maxpackfilecount`` specifies the maximum number of packs in the + shared cache (trees only for now) + ``remotefilelog.backgroundprefetch`` runs prefetch in background when True + ``remotefilelog.bgprefetchrevs`` specifies revisions to fetch on commit and + update, and on other commands that use them. Different from pullprefetch. + ``remotefilelog.gcrepack`` does garbage collection during repack when True + ``remotefilelog.nodettl`` specifies maximum TTL of a node in seconds before + it is garbage collected + ``remotefilelog.repackonhggc`` runs repack on hg gc when True + ``remotefilelog.prefetchdays`` specifies the maximum age of a commit in + days after which it is no longer prefetched. + ``remotefilelog.prefetchdelay`` specifies delay between background + prefetches in seconds after operations that change the working copy parent + ``remotefilelog.data.gencountlimit`` constraints the minimum number of data + pack files required to be considered part of a generation. In particular, + minimum number of packs files > gencountlimit. + ``remotefilelog.data.generations`` list for specifying the lower bound of + each generation of the data pack files. For example, list ['100MB','1MB'] + or ['1MB', '100MB'] will lead to three generations: [0, 1MB), [ + 1MB, 100MB) and [100MB, infinity). + ``remotefilelog.data.maxrepackpacks`` the maximum number of pack files to + include in an incremental data repack. + ``remotefilelog.data.repackmaxpacksize`` the maximum size of a pack file for + it to be considered for an incremental data repack. + ``remotefilelog.data.repacksizelimit`` the maximum total size of pack files + to include in an incremental data repack. + ``remotefilelog.history.gencountlimit`` constraints the minimum number of + history pack files required to be considered part of a generation. In + particular, minimum number of packs files > gencountlimit. + ``remotefilelog.history.generations`` list for specifying the lower bound of + each generation of the historhy pack files. For example, list [ + '100MB', '1MB'] or ['1MB', '100MB'] will lead to three generations: [ + 0, 1MB), [1MB, 100MB) and [100MB, infinity). + ``remotefilelog.history.maxrepackpacks`` the maximum number of pack files to + include in an incremental history repack. + ``remotefilelog.history.repackmaxpacksize`` the maximum size of a pack file + for it to be considered for an incremental history repack. + ``remotefilelog.history.repacksizelimit`` the maximum total size of pack + files to include in an incremental history repack. + ``remotefilelog.backgroundrepack`` automatically consolidate packs in the + background + ``remotefilelog.cachepath`` path to cache + ``remotefilelog.cachegroup`` if set, make cache directory sgid to this + group + ``remotefilelog.cacheprocess`` binary to invoke for fetching file data + ``remotefilelog.debug`` turn on remotefilelog-specific debug output + ``remotefilelog.excludepattern`` pattern of files to exclude from pulls + ``remotefilelog.includepattern``pattern of files to include in pulls + ``remotefilelog.fetchpacks`` if set, fetch pre-packed files from the server + ``remotefilelog.fetchwarning``: message to print when too many + single-file fetches occur + ``remotefilelog.getfilesstep`` number of files to request in a single RPC + ``remotefilelog.getfilestype`` if set to 'threaded' use threads to fetch + files, otherwise use optimistic fetching + ``remotefilelog.pullprefetch`` revset for selecting files that should be + eagerly downloaded rather than lazily + ``remotefilelog.reponame`` name of the repo. If set, used to partition + data from other repos in a shared store. + ``remotefilelog.server`` if true, enable server-side functionality + ``remotefilelog.servercachepath`` path for caching blobs on the server + ``remotefilelog.serverexpiration`` number of days to keep cached server + blobs + ``remotefilelog.validatecache`` if set, check cache entries for corruption + before returning blobs + ``remotefilelog.validatecachelog`` if set, check cache entries for + corruption before returning metadata + +""" +from __future__ import absolute_import + +import os +import time +import traceback + +from mercurial.node import hex +from mercurial.i18n import _ +from mercurial import ( + changegroup, + changelog, + cmdutil, + commands, + configitems, + context, + copies, + debugcommands as hgdebugcommands, + dispatch, + error, + exchange, + extensions, + hg, + localrepo, + match, + merge, + node as nodemod, + patch, + registrar, + repair, + repoview, + revset, + scmutil, + smartset, + templatekw, + util, +) +from . import ( + debugcommands, + fileserverclient, + remotefilectx, + remotefilelog, + remotefilelogserver, + repack as repackmod, + shallowbundle, + shallowrepo, + shallowstore, + shallowutil, + shallowverifier, +) + +# ensures debug commands are registered +hgdebugcommands.command + +try: + from mercurial import streamclone + streamclone._walkstreamfiles + hasstreamclone = True +except Exception: + hasstreamclone = False + +cmdtable = {} +command = registrar.command(cmdtable) + +configtable = {} +configitem = registrar.configitem(configtable) + +configitem('remotefilelog', 'debug', default=False) + +configitem('remotefilelog', 'reponame', default='') +configitem('remotefilelog', 'cachepath', default=None) +configitem('remotefilelog', 'cachegroup', default=None) +configitem('remotefilelog', 'cacheprocess', default=None) +configitem('remotefilelog', 'cacheprocess.includepath', default=None) +configitem("remotefilelog", "cachelimit", default="1000 GB") + +configitem('remotefilelog', 'fetchpacks', default=False) +configitem('remotefilelog', 'fallbackpath', default=configitems.dynamicdefault, + alias=[('remotefilelog', 'fallbackrepo')]) + +configitem('remotefilelog', 'validatecachelog', default=None) +configitem('remotefilelog', 'validatecache', default='on') +configitem('remotefilelog', 'server', default=None) +configitem('remotefilelog', 'servercachepath', default=None) +configitem("remotefilelog", "serverexpiration", default=30) +configitem('remotefilelog', 'backgroundrepack', default=False) +configitem('remotefilelog', 'bgprefetchrevs', default=None) +configitem('remotefilelog', 'pullprefetch', default=None) +configitem('remotefilelog', 'backgroundprefetch', default=False) +configitem('remotefilelog', 'prefetchdelay', default=120) +configitem('remotefilelog', 'prefetchdays', default=14) + +configitem('remotefilelog', 'getfilesstep', default=10000) +configitem('remotefilelog', 'getfilestype', default='optimistic') +configitem('remotefilelog', 'batchsize', configitems.dynamicdefault) +configitem('remotefilelog', 'fetchwarning', default='') + +configitem('remotefilelog', 'includepattern', default=None) +configitem('remotefilelog', 'excludepattern', default=None) + +configitem('remotefilelog', 'gcrepack', default=False) +configitem('remotefilelog', 'repackonhggc', default=False) +configitem('remotefilelog', 'datapackversion', default=0) +configitem('repack', 'chainorphansbysize', default=True) + +configitem('packs', 'maxpacksize', default=0) +configitem('packs', 'maxchainlen', default=1000) + +configitem('remotefilelog', 'historypackv1', default=False) +# default TTL limit is 30 days +_defaultlimit = 60 * 60 * 24 * 30 +configitem('remotefilelog', 'nodettl', default=_defaultlimit) + +configitem('remotefilelog', 'data.gencountlimit', default=2), +configitem('remotefilelog', 'data.generations', + default=['1GB', '100MB', '1MB']) +configitem('remotefilelog', 'data.maxrepackpacks', default=50) +configitem('remotefilelog', 'data.repackmaxpacksize', default='4GB') +configitem('remotefilelog', 'data.repacksizelimit', default='100MB') + +configitem('remotefilelog', 'history.gencountlimit', default=2), +configitem('remotefilelog', 'history.generations', default=['100MB']) +configitem('remotefilelog', 'history.maxrepackpacks', default=50) +configitem('remotefilelog', 'history.repackmaxpacksize', default='400MB') +configitem('remotefilelog', 'history.repacksizelimit', default='100MB') + +# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for +# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should +# be specifying the version(s) of Mercurial they are tested with, or +# leave the attribute unspecified. +testedwith = 'ships-with-hg-core' + +repoclass = localrepo.localrepository +repoclass._basesupported.add(shallowrepo.requirement) + +def uisetup(ui): + """Wraps user facing Mercurial commands to swap them out with shallow + versions. + """ + hg.wirepeersetupfuncs.append(fileserverclient.peersetup) + + entry = extensions.wrapcommand(commands.table, 'clone', cloneshallow) + entry[1].append(('', 'shallow', None, + _("create a shallow clone which uses remote file " + "history"))) + + extensions.wrapcommand(commands.table, 'debugindex', + debugcommands.debugindex) + extensions.wrapcommand(commands.table, 'debugindexdot', + debugcommands.debugindexdot) + extensions.wrapcommand(commands.table, 'log', log) + extensions.wrapcommand(commands.table, 'pull', pull) + + # Prevent 'hg manifest --all' + def _manifest(orig, ui, repo, *args, **opts): + if shallowrepo.requirement in repo.requirements and opts.get('all'): + raise error.Abort(_("--all is not supported in a shallow repo")) + + return orig(ui, repo, *args, **opts) + extensions.wrapcommand(commands.table, "manifest", _manifest) + + # Wrap remotefilelog with lfs code + def _lfsloaded(loaded=False): + lfsmod = None + try: + lfsmod = extensions.find('lfs') + except KeyError: + pass + if lfsmod: + lfsmod.wrapfilelog(remotefilelog.remotefilelog) + fileserverclient._lfsmod = lfsmod + extensions.afterloaded('lfs', _lfsloaded) + + # debugdata needs remotefilelog.len to work + extensions.wrapcommand(commands.table, 'debugdata', debugdatashallow) + +def cloneshallow(orig, ui, repo, *args, **opts): + if opts.get('shallow'): + repos = [] + def pull_shallow(orig, self, *args, **kwargs): + if shallowrepo.requirement not in self.requirements: + repos.append(self.unfiltered()) + # set up the client hooks so the post-clone update works + setupclient(self.ui, self.unfiltered()) + + # setupclient fixed the class on the repo itself + # but we also need to fix it on the repoview + if isinstance(self, repoview.repoview): + self.__class__.__bases__ = (self.__class__.__bases__[0], + self.unfiltered().__class__) + self.requirements.add(shallowrepo.requirement) + self._writerequirements() + + # Since setupclient hadn't been called, exchange.pull was not + # wrapped. So we need to manually invoke our version of it. + return exchangepull(orig, self, *args, **kwargs) + else: + return orig(self, *args, **kwargs) + extensions.wrapfunction(exchange, 'pull', pull_shallow) + + # Wrap the stream logic to add requirements and to pass include/exclude + # patterns around. + def setup_streamout(repo, remote): + # Replace remote.stream_out with a version that sends file + # patterns. + def stream_out_shallow(orig): + caps = remote.capabilities() + if shallowrepo.requirement in caps: + opts = {} + if repo.includepattern: + opts['includepattern'] = '\0'.join(repo.includepattern) + if repo.excludepattern: + opts['excludepattern'] = '\0'.join(repo.excludepattern) + return remote._callstream('stream_out_shallow', **opts) + else: + return orig() + extensions.wrapfunction(remote, 'stream_out', stream_out_shallow) + if hasstreamclone: + def stream_wrap(orig, op): + setup_streamout(op.repo, op.remote) + return orig(op) + extensions.wrapfunction( + streamclone, 'maybeperformlegacystreamclone', stream_wrap) + + def canperformstreamclone(orig, pullop, bundle2=False): + # remotefilelog is currently incompatible with the + # bundle2 flavor of streamclones, so force us to use + # v1 instead. + if 'v2' in pullop.remotebundle2caps.get('stream', []): + pullop.remotebundle2caps['stream'] = [ + c for c in pullop.remotebundle2caps['stream'] + if c != 'v2'] + if bundle2: + return False, None + supported, requirements = orig(pullop, bundle2=bundle2) + if requirements is not None: + requirements.add(shallowrepo.requirement) + return supported, requirements + extensions.wrapfunction( + streamclone, 'canperformstreamclone', canperformstreamclone) + else: + def stream_in_shallow(orig, repo, remote, requirements): + setup_streamout(repo, remote) + requirements.add(shallowrepo.requirement) + return orig(repo, remote, requirements) + extensions.wrapfunction( + localrepo.localrepository, 'stream_in', stream_in_shallow) + + try: + orig(ui, repo, *args, **opts) + finally: + if opts.get('shallow'): + for r in repos: + if util.safehasattr(r, 'fileservice'): + r.fileservice.close() + +def debugdatashallow(orig, *args, **kwds): + oldlen = remotefilelog.remotefilelog.__len__ + try: + remotefilelog.remotefilelog.__len__ = lambda x: 1 + return orig(*args, **kwds) + finally: + remotefilelog.remotefilelog.__len__ = oldlen + +def reposetup(ui, repo): + if not isinstance(repo, localrepo.localrepository): + return + + # put here intentionally bc doesnt work in uisetup + ui.setconfig('hooks', 'update.prefetch', wcpprefetch) + ui.setconfig('hooks', 'commit.prefetch', wcpprefetch) + + isserverenabled = ui.configbool('remotefilelog', 'server') + isshallowclient = shallowrepo.requirement in repo.requirements + + if isserverenabled and isshallowclient: + raise RuntimeError("Cannot be both a server and shallow client.") + + if isshallowclient: + setupclient(ui, repo) + + if isserverenabled: + remotefilelogserver.setupserver(ui, repo) + +def setupclient(ui, repo): + if not isinstance(repo, localrepo.localrepository): + return + + # Even clients get the server setup since they need to have the + # wireprotocol endpoints registered. + remotefilelogserver.onetimesetup(ui) + onetimeclientsetup(ui) + + shallowrepo.wraprepo(repo) + repo.store = shallowstore.wrapstore(repo.store) + +clientonetime = False +def onetimeclientsetup(ui): + global clientonetime + if clientonetime: + return + clientonetime = True + + changegroup.cgpacker = shallowbundle.shallowcg1packer + + extensions.wrapfunction(changegroup, '_addchangegroupfiles', + shallowbundle.addchangegroupfiles) + extensions.wrapfunction( + changegroup, 'makechangegroup', shallowbundle.makechangegroup) + + def storewrapper(orig, requirements, path, vfstype): + s = orig(requirements, path, vfstype) + if shallowrepo.requirement in requirements: + s = shallowstore.wrapstore(s) + + return s + extensions.wrapfunction(localrepo, 'makestore', storewrapper) + + extensions.wrapfunction(exchange, 'pull', exchangepull) + + # prefetch files before update + def applyupdates(orig, repo, actions, wctx, mctx, overwrite, labels=None): + if shallowrepo.requirement in repo.requirements: + manifest = mctx.manifest() + files = [] + for f, args, msg in actions['g']: + files.append((f, hex(manifest[f]))) + # batch fetch the needed files from the server + repo.fileservice.prefetch(files) + return orig(repo, actions, wctx, mctx, overwrite, labels=labels) + extensions.wrapfunction(merge, 'applyupdates', applyupdates) + + # Prefetch merge checkunknownfiles + def checkunknownfiles(orig, repo, wctx, mctx, force, actions, + *args, **kwargs): + if shallowrepo.requirement in repo.requirements: + files = [] + sparsematch = repo.maybesparsematch(mctx.rev()) + for f, (m, actionargs, msg) in actions.iteritems(): + if sparsematch and not sparsematch(f): + continue + if m in ('c', 'dc', 'cm'): + files.append((f, hex(mctx.filenode(f)))) + elif m == 'dg': + f2 = actionargs[0] + files.append((f2, hex(mctx.filenode(f2)))) + # batch fetch the needed files from the server + repo.fileservice.prefetch(files) + return orig(repo, wctx, mctx, force, actions, *args, **kwargs) + extensions.wrapfunction(merge, '_checkunknownfiles', checkunknownfiles) + + # Prefetch files before status attempts to look at their size and contents + def checklookup(orig, self, files): + repo = self._repo + if shallowrepo.requirement in repo.requirements: + prefetchfiles = [] + for parent in self._parents: + for f in files: + if f in parent: + prefetchfiles.append((f, hex(parent.filenode(f)))) + # batch fetch the needed files from the server + repo.fileservice.prefetch(prefetchfiles) + return orig(self, files) + extensions.wrapfunction(context.workingctx, '_checklookup', checklookup) + + # Prefetch the logic that compares added and removed files for renames + def findrenames(orig, repo, matcher, added, removed, *args, **kwargs): + if shallowrepo.requirement in repo.requirements: + files = [] + parentctx = repo['.'] + for f in removed: + files.append((f, hex(parentctx.filenode(f)))) + # batch fetch the needed files from the server + repo.fileservice.prefetch(files) + return orig(repo, matcher, added, removed, *args, **kwargs) + extensions.wrapfunction(scmutil, '_findrenames', findrenames) + + # prefetch files before mergecopies check + def computenonoverlap(orig, repo, c1, c2, *args, **kwargs): + u1, u2 = orig(repo, c1, c2, *args, **kwargs) + if shallowrepo.requirement in repo.requirements: + m1 = c1.manifest() + m2 = c2.manifest() + files = [] + + sparsematch1 = repo.maybesparsematch(c1.rev()) + if sparsematch1: + sparseu1 = [] + for f in u1: + if sparsematch1(f): + files.append((f, hex(m1[f]))) + sparseu1.append(f) + u1 = sparseu1 + + sparsematch2 = repo.maybesparsematch(c2.rev()) + if sparsematch2: + sparseu2 = [] + for f in u2: + if sparsematch2(f): + files.append((f, hex(m2[f]))) + sparseu2.append(f) + u2 = sparseu2 + + # batch fetch the needed files from the server + repo.fileservice.prefetch(files) + return u1, u2 + extensions.wrapfunction(copies, '_computenonoverlap', computenonoverlap) + + # prefetch files before pathcopies check + def computeforwardmissing(orig, a, b, match=None): + missing = list(orig(a, b, match=match)) + repo = a._repo + if shallowrepo.requirement in repo.requirements: + mb = b.manifest() + + files = [] + sparsematch = repo.maybesparsematch(b.rev()) + if sparsematch: + sparsemissing = [] + for f in missing: + if sparsematch(f): + files.append((f, hex(mb[f]))) + sparsemissing.append(f) + missing = sparsemissing + + # batch fetch the needed files from the server + repo.fileservice.prefetch(files) + return missing + extensions.wrapfunction(copies, '_computeforwardmissing', + computeforwardmissing) + + # close cache miss server connection after the command has finished + def runcommand(orig, lui, repo, *args, **kwargs): + try: + return orig(lui, repo, *args, **kwargs) + finally: + # repo can be None when running in chg: + # - at startup, reposetup was called because serve is not norepo + # - a norepo command like "help" is called + if repo and shallowrepo.requirement in repo.requirements: + repo.fileservice.close() + extensions.wrapfunction(dispatch, 'runcommand', runcommand) + + # disappointing hacks below + templatekw.getrenamedfn = getrenamedfn + extensions.wrapfunction(revset, 'filelog', filelogrevset) + revset.symbols['filelog'] = revset.filelog + extensions.wrapfunction(cmdutil, 'walkfilerevs', walkfilerevs) + + # prevent strip from stripping remotefilelogs + def _collectbrokencsets(orig, repo, files, striprev): + if shallowrepo.requirement in repo.requirements: + files = list([f for f in files if not repo.shallowmatch(f)]) + return orig(repo, files, striprev) + extensions.wrapfunction(repair, '_collectbrokencsets', _collectbrokencsets) + + # Don't commit filelogs until we know the commit hash, since the hash + # is present in the filelog blob. + # This violates Mercurial's filelog->manifest->changelog write order, + # but is generally fine for client repos. + pendingfilecommits = [] + def addrawrevision(orig, self, rawtext, transaction, link, p1, p2, node, + flags, cachedelta=None, _metatuple=None): + if isinstance(link, int): + pendingfilecommits.append( + (self, rawtext, transaction, link, p1, p2, node, flags, + cachedelta, _metatuple)) + return node + else: + return orig(self, rawtext, transaction, link, p1, p2, node, flags, + cachedelta, _metatuple=_metatuple) + extensions.wrapfunction( + remotefilelog.remotefilelog, 'addrawrevision', addrawrevision) + + def changelogadd(orig, self, *args): + oldlen = len(self) + node = orig(self, *args) + newlen = len(self) + if oldlen != newlen: + for oldargs in pendingfilecommits: + log, rt, tr, link, p1, p2, n, fl, c, m = oldargs + linknode = self.node(link) + if linknode == node: + log.addrawrevision(rt, tr, linknode, p1, p2, n, fl, c, m) + else: + raise error.ProgrammingError( + 'pending multiple integer revisions are not supported') + else: + # "link" is actually wrong here (it is set to len(changelog)) + # if changelog remains unchanged, skip writing file revisions + # but still do a sanity check about pending multiple revisions + if len(set(x[3] for x in pendingfilecommits)) > 1: + raise error.ProgrammingError( + 'pending multiple integer revisions are not supported') + del pendingfilecommits[:] + return node + extensions.wrapfunction(changelog.changelog, 'add', changelogadd) + + # changectx wrappers + def filectx(orig, self, path, fileid=None, filelog=None): + if fileid is None: + fileid = self.filenode(path) + if (shallowrepo.requirement in self._repo.requirements and + self._repo.shallowmatch(path)): + return remotefilectx.remotefilectx(self._repo, path, + fileid=fileid, changectx=self, filelog=filelog) + return orig(self, path, fileid=fileid, filelog=filelog) + extensions.wrapfunction(context.changectx, 'filectx', filectx) + + def workingfilectx(orig, self, path, filelog=None): + if (shallowrepo.requirement in self._repo.requirements and + self._repo.shallowmatch(path)): + return remotefilectx.remoteworkingfilectx(self._repo, + path, workingctx=self, filelog=filelog) + return orig(self, path, filelog=filelog) + extensions.wrapfunction(context.workingctx, 'filectx', workingfilectx) + + # prefetch required revisions before a diff + def trydiff(orig, repo, revs, ctx1, ctx2, modified, added, removed, + copy, getfilectx, *args, **kwargs): + if shallowrepo.requirement in repo.requirements: + prefetch = [] + mf1 = ctx1.manifest() + for fname in modified + added + removed: + if fname in mf1: + fnode = getfilectx(fname, ctx1).filenode() + # fnode can be None if it's a edited working ctx file + if fnode: + prefetch.append((fname, hex(fnode))) + if fname not in removed: + fnode = getfilectx(fname, ctx2).filenode() + if fnode: + prefetch.append((fname, hex(fnode))) + + repo.fileservice.prefetch(prefetch) + + return orig(repo, revs, ctx1, ctx2, modified, added, removed, + copy, getfilectx, *args, **kwargs) + extensions.wrapfunction(patch, 'trydiff', trydiff) + + # Prevent verify from processing files + # a stub for mercurial.hg.verify() + def _verify(orig, repo): + lock = repo.lock() + try: + return shallowverifier.shallowverifier(repo).verify() + finally: + lock.release() + + extensions.wrapfunction(hg, 'verify', _verify) + + scmutil.fileprefetchhooks.add('remotefilelog', _fileprefetchhook) + +def getrenamedfn(repo, endrev=None): + rcache = {} + + def getrenamed(fn, rev): + '''looks up all renames for a file (up to endrev) the first + time the file is given. It indexes on the changerev and only + parses the manifest if linkrev != changerev. + Returns rename info for fn at changerev rev.''' + if rev in rcache.setdefault(fn, {}): + return rcache[fn][rev] + + try: + fctx = repo[rev].filectx(fn) + for ancestor in fctx.ancestors(): + if ancestor.path() == fn: + renamed = ancestor.renamed() + rcache[fn][ancestor.rev()] = renamed + + return fctx.renamed() + except error.LookupError: + return None + + return getrenamed + +def walkfilerevs(orig, repo, match, follow, revs, fncache): + if not shallowrepo.requirement in repo.requirements: + return orig(repo, match, follow, revs, fncache) + + # remotefilelog's can't be walked in rev order, so throw. + # The caller will see the exception and walk the commit tree instead. + if not follow: + raise cmdutil.FileWalkError("Cannot walk via filelog") + + wanted = set() + minrev, maxrev = min(revs), max(revs) + + pctx = repo['.'] + for filename in match.files(): + if filename not in pctx: + raise error.Abort(_('cannot follow file not in parent ' + 'revision: "%s"') % filename) + fctx = pctx[filename] + + linkrev = fctx.linkrev() + if linkrev >= minrev and linkrev <= maxrev: + fncache.setdefault(linkrev, []).append(filename) + wanted.add(linkrev) + + for ancestor in fctx.ancestors(): + linkrev = ancestor.linkrev() + if linkrev >= minrev and linkrev <= maxrev: + fncache.setdefault(linkrev, []).append(ancestor.path()) + wanted.add(linkrev) + + return wanted + +def filelogrevset(orig, repo, subset, x): + """``filelog(pattern)`` + Changesets connected to the specified filelog. + + For performance reasons, ``filelog()`` does not show every changeset + that affects the requested file(s). See :hg:`help log` for details. For + a slower, more accurate result, use ``file()``. + """ + + if not shallowrepo.requirement in repo.requirements: + return orig(repo, subset, x) + + # i18n: "filelog" is a keyword + pat = revset.getstring(x, _("filelog requires a pattern")) + m = match.match(repo.root, repo.getcwd(), [pat], default='relpath', + ctx=repo[None]) + s = set() + + if not match.patkind(pat): + # slow + for r in subset: + ctx = repo[r] + cfiles = ctx.files() + for f in m.files(): + if f in cfiles: + s.add(ctx.rev()) + break + else: + # partial + files = (f for f in repo[None] if m(f)) + for f in files: + fctx = repo[None].filectx(f) + s.add(fctx.linkrev()) + for actx in fctx.ancestors(): + s.add(actx.linkrev()) + + return smartset.baseset([r for r in subset if r in s]) + +@command('gc', [], _('hg gc [REPO...]'), norepo=True) +def gc(ui, *args, **opts): + '''garbage collect the client and server filelog caches + ''' + cachepaths = set() + + # get the system client cache + systemcache = shallowutil.getcachepath(ui, allowempty=True) + if systemcache: + cachepaths.add(systemcache) + + # get repo client and server cache + repopaths = [] + pwd = ui.environ.get('PWD') + if pwd: + repopaths.append(pwd) + + repopaths.extend(args) + repos = [] + for repopath in repopaths: + try: + repo = hg.peer(ui, {}, repopath) + repos.append(repo) + + repocache = shallowutil.getcachepath(repo.ui, allowempty=True) + if repocache: + cachepaths.add(repocache) + except error.RepoError: + pass + + # gc client cache + for cachepath in cachepaths: + gcclient(ui, cachepath) + + # gc server cache + for repo in repos: + remotefilelogserver.gcserver(ui, repo._repo) + +def gcclient(ui, cachepath): + # get list of repos that use this cache + repospath = os.path.join(cachepath, 'repos') + if not os.path.exists(repospath): + ui.warn(_("no known cache at %s\n") % cachepath) + return + + reposfile = open(repospath, 'r') + repos = set([r[:-1] for r in reposfile.readlines()]) + reposfile.close() + + # build list of useful files + validrepos = [] + keepkeys = set() + + _analyzing = _("analyzing repositories") + + sharedcache = None + filesrepacked = False + + count = 0 + for path in repos: + ui.progress(_analyzing, count, unit="repos", total=len(repos)) + count += 1 + try: + path = ui.expandpath(os.path.normpath(path)) + except TypeError as e: + ui.warn(_("warning: malformed path: %r:%s\n") % (path, e)) + traceback.print_exc() + continue + try: + peer = hg.peer(ui, {}, path) + repo = peer._repo + except error.RepoError: + continue + + validrepos.append(path) + + # Protect against any repo or config changes that have happened since + # this repo was added to the repos file. We'd rather this loop succeed + # and too much be deleted, than the loop fail and nothing gets deleted. + if shallowrepo.requirement not in repo.requirements: + continue + + if not util.safehasattr(repo, 'name'): + ui.warn(_("repo %s is a misconfigured remotefilelog repo\n") % path) + continue + + # If garbage collection on repack and repack on hg gc are enabled + # then loose files are repacked and garbage collected. + # Otherwise regular garbage collection is performed. + repackonhggc = repo.ui.configbool('remotefilelog', 'repackonhggc') + gcrepack = repo.ui.configbool('remotefilelog', 'gcrepack') + if repackonhggc and gcrepack: + try: + repackmod.incrementalrepack(repo) + filesrepacked = True + continue + except (IOError, repackmod.RepackAlreadyRunning): + # If repack cannot be performed due to not enough disk space + # continue doing garbage collection of loose files w/o repack + pass + + reponame = repo.name + if not sharedcache: + sharedcache = repo.sharedstore + + # Compute a keepset which is not garbage collected + def keyfn(fname, fnode): + return fileserverclient.getcachekey(reponame, fname, hex(fnode)) + keepkeys = repackmod.keepset(repo, keyfn=keyfn, lastkeepkeys=keepkeys) + + ui.progress(_analyzing, None) + + # write list of valid repos back + oldumask = os.umask(0o002) + try: + reposfile = open(repospath, 'w') + reposfile.writelines([("%s\n" % r) for r in validrepos]) + reposfile.close() + finally: + os.umask(oldumask) + + # prune cache + if sharedcache is not None: + sharedcache.gc(keepkeys) + elif not filesrepacked: + ui.warn(_("warning: no valid repos in repofile\n")) + +def log(orig, ui, repo, *pats, **opts): + if shallowrepo.requirement not in repo.requirements: + return orig(ui, repo, *pats, **opts) + + follow = opts.get('follow') + revs = opts.get('rev') + if pats: + # Force slowpath for non-follow patterns and follows that start from + # non-working-copy-parent revs. + if not follow or revs: + # This forces the slowpath + opts['removed'] = True + + # If this is a non-follow log without any revs specified, recommend that + # the user add -f to speed it up. + if not follow and not revs: + match, pats = scmutil.matchandpats(repo['.'], pats, opts) + isfile = not match.anypats() + if isfile: + for file in match.files(): + if not os.path.isfile(repo.wjoin(file)): + isfile = False + break + + if isfile: + ui.warn(_("warning: file log can be slow on large repos - " + + "use -f to speed it up\n")) + + return orig(ui, repo, *pats, **opts) + +def revdatelimit(ui, revset): + """Update revset so that only changesets no older than 'prefetchdays' days + are included. The default value is set to 14 days. If 'prefetchdays' is set + to zero or negative value then date restriction is not applied. + """ + days = ui.configint('remotefilelog', 'prefetchdays') + if days > 0: + revset = '(%s) & date(-%s)' % (revset, days) + return revset + +def readytofetch(repo): + """Check that enough time has passed since the last background prefetch. + This only relates to prefetches after operations that change the working + copy parent. Default delay between background prefetches is 2 minutes. + """ + timeout = repo.ui.configint('remotefilelog', 'prefetchdelay') + fname = repo.vfs.join('lastprefetch') + + ready = False + with open(fname, 'a'): + # the with construct above is used to avoid race conditions + modtime = os.path.getmtime(fname) + if (time.time() - modtime) > timeout: + os.utime(fname, None) + ready = True + + return ready + +def wcpprefetch(ui, repo, **kwargs): + """Prefetches in background revisions specified by bgprefetchrevs revset. + Does background repack if backgroundrepack flag is set in config. + """ + shallow = shallowrepo.requirement in repo.requirements + bgprefetchrevs = ui.config('remotefilelog', 'bgprefetchrevs') + isready = readytofetch(repo) + + if not (shallow and bgprefetchrevs and isready): + return + + bgrepack = repo.ui.configbool('remotefilelog', 'backgroundrepack') + # update a revset with a date limit + bgprefetchrevs = revdatelimit(ui, bgprefetchrevs) + + def anon(): + if util.safehasattr(repo, 'ranprefetch') and repo.ranprefetch: + return + repo.ranprefetch = True + repo.backgroundprefetch(bgprefetchrevs, repack=bgrepack) + + repo._afterlock(anon) + +def pull(orig, ui, repo, *pats, **opts): + result = orig(ui, repo, *pats, **opts) + + if shallowrepo.requirement in repo.requirements: + # prefetch if it's configured + prefetchrevset = ui.config('remotefilelog', 'pullprefetch') + bgrepack = repo.ui.configbool('remotefilelog', 'backgroundrepack') + bgprefetch = repo.ui.configbool('remotefilelog', 'backgroundprefetch') + + if prefetchrevset: + ui.status(_("prefetching file contents\n")) + revs = scmutil.revrange(repo, [prefetchrevset]) + base = repo['.'].rev() + if bgprefetch: + repo.backgroundprefetch(prefetchrevset, repack=bgrepack) + else: + repo.prefetch(revs, base=base) + if bgrepack: + repackmod.backgroundrepack(repo, incremental=True) + elif bgrepack: + repackmod.backgroundrepack(repo, incremental=True) + + return result + +def exchangepull(orig, repo, remote, *args, **kwargs): + # Hook into the callstream/getbundle to insert bundle capabilities + # during a pull. + def localgetbundle(orig, source, heads=None, common=None, bundlecaps=None, + **kwargs): + if not bundlecaps: + bundlecaps = set() + bundlecaps.add('remotefilelog') + return orig(source, heads=heads, common=common, bundlecaps=bundlecaps, + **kwargs) + + if util.safehasattr(remote, '_callstream'): + remote._localrepo = repo + elif util.safehasattr(remote, 'getbundle'): + extensions.wrapfunction(remote, 'getbundle', localgetbundle) + + return orig(repo, remote, *args, **kwargs) + +def _fileprefetchhook(repo, revs, match): + if shallowrepo.requirement in repo.requirements: + allfiles = [] + for rev in revs: + if rev == nodemod.wdirrev or rev is None: + continue + ctx = repo[rev] + mf = ctx.manifest() + sparsematch = repo.maybesparsematch(ctx.rev()) + for path in ctx.walk(match): + if path.endswith('/'): + # Tree manifest that's being excluded as part of narrow + continue + if (not sparsematch or sparsematch(path)) and path in mf: + allfiles.append((path, hex(mf[path]))) + repo.fileservice.prefetch(allfiles) + +@command('debugremotefilelog', [ + ('d', 'decompress', None, _('decompress the filelog first')), + ], _('hg debugremotefilelog <path>'), norepo=True) +def debugremotefilelog(ui, path, **opts): + return debugcommands.debugremotefilelog(ui, path, **opts) + +@command('verifyremotefilelog', [ + ('d', 'decompress', None, _('decompress the filelogs first')), + ], _('hg verifyremotefilelogs <directory>'), norepo=True) +def verifyremotefilelog(ui, path, **opts): + return debugcommands.verifyremotefilelog(ui, path, **opts) + +@command('debugdatapack', [ + ('', 'long', None, _('print the long hashes')), + ('', 'node', '', _('dump the contents of node'), 'NODE'), + ], _('hg debugdatapack <paths>'), norepo=True) +def debugdatapack(ui, *paths, **opts): + return debugcommands.debugdatapack(ui, *paths, **opts) + +@command('debughistorypack', [ + ], _('hg debughistorypack <path>'), norepo=True) +def debughistorypack(ui, path, **opts): + return debugcommands.debughistorypack(ui, path) + +@command('debugkeepset', [ + ], _('hg debugkeepset')) +def debugkeepset(ui, repo, **opts): + # The command is used to measure keepset computation time + def keyfn(fname, fnode): + return fileserverclient.getcachekey(repo.name, fname, hex(fnode)) + repackmod.keepset(repo, keyfn) + return + +@command('debugwaitonrepack', [ + ], _('hg debugwaitonrepack')) +def debugwaitonrepack(ui, repo, **opts): + return debugcommands.debugwaitonrepack(repo) + +@command('debugwaitonprefetch', [ + ], _('hg debugwaitonprefetch')) +def debugwaitonprefetch(ui, repo, **opts): + return debugcommands.debugwaitonprefetch(repo) + +def resolveprefetchopts(ui, opts): + if not opts.get('rev'): + revset = ['.', 'draft()'] + + prefetchrevset = ui.config('remotefilelog', 'pullprefetch', None) + if prefetchrevset: + revset.append('(%s)' % prefetchrevset) + bgprefetchrevs = ui.config('remotefilelog', 'bgprefetchrevs', None) + if bgprefetchrevs: + revset.append('(%s)' % bgprefetchrevs) + revset = '+'.join(revset) + + # update a revset with a date limit + revset = revdatelimit(ui, revset) + + opts['rev'] = [revset] + + if not opts.get('base'): + opts['base'] = None + + return opts + +@command('prefetch', [ + ('r', 'rev', [], _('prefetch the specified revisions'), _('REV')), + ('', 'repack', False, _('run repack after prefetch')), + ('b', 'base', '', _("rev that is assumed to already be local")), + ] + commands.walkopts, _('hg prefetch [OPTIONS] [FILE...]')) +def prefetch(ui, repo, *pats, **opts): + """prefetch file revisions from the server + + Prefetchs file revisions for the specified revs and stores them in the + local remotefilelog cache. If no rev is specified, the default rev is + used which is the union of dot, draft, pullprefetch and bgprefetchrev. + File names or patterns can be used to limit which files are downloaded. + + Return 0 on success. + """ + if not shallowrepo.requirement in repo.requirements: + raise error.Abort(_("repo is not shallow")) + + opts = resolveprefetchopts(ui, opts) + revs = scmutil.revrange(repo, opts.get('rev')) + repo.prefetch(revs, opts.get('base'), pats, opts) + + # Run repack in background + if opts.get('repack'): + repackmod.backgroundrepack(repo, incremental=True) + +@command('repack', [ + ('', 'background', None, _('run in a background process'), None), + ('', 'incremental', None, _('do an incremental repack'), None), + ('', 'packsonly', None, _('only repack packs (skip loose objects)'), None), + ], _('hg repack [OPTIONS]')) +def repack_(ui, repo, *pats, **opts): + if opts.get('background'): + repackmod.backgroundrepack(repo, incremental=opts.get('incremental'), + packsonly=opts.get('packsonly', False)) + return + + options = {'packsonly': opts.get('packsonly')} + + try: + if opts.get('incremental'): + repackmod.incrementalrepack(repo, options=options) + else: + repackmod.fullrepack(repo, options=options) + except repackmod.RepackAlreadyRunning as ex: + # Don't propogate the exception if the repack is already in + # progress, since we want the command to exit 0. + repo.ui.warn('%s\n' % ex)