diff hgext/remotefilelog/shallowrepo.py @ 40495:3a333a582d7b

remotefilelog: import pruned-down remotefilelog extension from hg-experimental This is remotefilelog as of my recent patches for compatibility with current tip of hg, minus support for old versions of Mercurial and some FB-specific features like their treemanifest extension and fetching linkrev data from a patched phabricator. The file extutil.py moved from hgext3rd to remotefilelog. This is not yet ready to be landed, consider it a preview for now. Planned changes include: * replace lz4 with zstd * rename some capabilities, requirements and wireproto commands to mark them as experimental * consolidate bits of shallowutil with related functions (eg readfile) I'm certainly open to other (small) changes, but my rough mission is to land this largely as-is so we can use it as a model of the functionality we need going forward for lazy-fetching of file contents from a server. # no-check-commit because of a few foo_bar functions Differential Revision: https://phab.mercurial-scm.org/D4782
author Augie Fackler <augie@google.com>
date Thu, 27 Sep 2018 13:03:19 -0400
parents
children 60eb35b0c11c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/remotefilelog/shallowrepo.py	Thu Sep 27 13:03:19 2018 -0400
@@ -0,0 +1,310 @@
+# shallowrepo.py - shallow repository that uses remote filelogs
+#
+# Copyright 2013 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+from __future__ import absolute_import
+
+import os
+
+from mercurial.i18n import _
+from mercurial.node import hex, nullid, nullrev
+from mercurial import (
+    encoding,
+    error,
+    localrepo,
+    match,
+    scmutil,
+    sparse,
+    util,
+)
+from mercurial.utils import procutil
+from . import (
+    connectionpool,
+    constants,
+    contentstore,
+    datapack,
+    extutil,
+    fileserverclient,
+    historypack,
+    metadatastore,
+    remotefilectx,
+    remotefilelog,
+    shallowutil,
+)
+
+if util.safehasattr(util, '_hgexecutable'):
+    # Before 5be286db
+    _hgexecutable = util.hgexecutable
+else:
+    from mercurial.utils import procutil
+    _hgexecutable = procutil.hgexecutable
+
+requirement = "remotefilelog"
+_prefetching = _('prefetching')
+
+# These make*stores functions are global so that other extensions can replace
+# them.
+def makelocalstores(repo):
+    """In-repo stores, like .hg/store/data; can not be discarded."""
+    localpath = os.path.join(repo.svfs.vfs.base, 'data')
+    if not os.path.exists(localpath):
+        os.makedirs(localpath)
+
+    # Instantiate local data stores
+    localcontent = contentstore.remotefilelogcontentstore(
+        repo, localpath, repo.name, shared=False)
+    localmetadata = metadatastore.remotefilelogmetadatastore(
+        repo, localpath, repo.name, shared=False)
+    return localcontent, localmetadata
+
+def makecachestores(repo):
+    """Typically machine-wide, cache of remote data; can be discarded."""
+    # Instantiate shared cache stores
+    cachepath = shallowutil.getcachepath(repo.ui)
+    cachecontent = contentstore.remotefilelogcontentstore(
+        repo, cachepath, repo.name, shared=True)
+    cachemetadata = metadatastore.remotefilelogmetadatastore(
+        repo, cachepath, repo.name, shared=True)
+
+    repo.sharedstore = cachecontent
+    repo.shareddatastores.append(cachecontent)
+    repo.sharedhistorystores.append(cachemetadata)
+
+    return cachecontent, cachemetadata
+
+def makeremotestores(repo, cachecontent, cachemetadata):
+    """These stores fetch data from a remote server."""
+    # Instantiate remote stores
+    repo.fileservice = fileserverclient.fileserverclient(repo)
+    remotecontent = contentstore.remotecontentstore(
+        repo.ui, repo.fileservice, cachecontent)
+    remotemetadata = metadatastore.remotemetadatastore(
+        repo.ui, repo.fileservice, cachemetadata)
+    return remotecontent, remotemetadata
+
+def makepackstores(repo):
+    """Packs are more efficient (to read from) cache stores."""
+    # Instantiate pack stores
+    packpath = shallowutil.getcachepackpath(repo,
+                                            constants.FILEPACK_CATEGORY)
+    packcontentstore = datapack.datapackstore(repo.ui, packpath)
+    packmetadatastore = historypack.historypackstore(repo.ui, packpath)
+
+    repo.shareddatastores.append(packcontentstore)
+    repo.sharedhistorystores.append(packmetadatastore)
+    shallowutil.reportpackmetrics(repo.ui, 'filestore', packcontentstore,
+        packmetadatastore)
+    return packcontentstore, packmetadatastore
+
+def makeunionstores(repo):
+    """Union stores iterate the other stores and return the first result."""
+    repo.shareddatastores = []
+    repo.sharedhistorystores = []
+
+    packcontentstore, packmetadatastore = makepackstores(repo)
+    cachecontent, cachemetadata = makecachestores(repo)
+    localcontent, localmetadata = makelocalstores(repo)
+    remotecontent, remotemetadata = makeremotestores(repo, cachecontent,
+                                                     cachemetadata)
+
+    # Instantiate union stores
+    repo.contentstore = contentstore.unioncontentstore(
+        packcontentstore, cachecontent,
+        localcontent, remotecontent, writestore=localcontent)
+    repo.metadatastore = metadatastore.unionmetadatastore(
+        packmetadatastore, cachemetadata, localmetadata, remotemetadata,
+        writestore=localmetadata)
+
+    fileservicedatawrite = cachecontent
+    fileservicehistorywrite = cachemetadata
+    if repo.ui.configbool('remotefilelog', 'fetchpacks'):
+        fileservicedatawrite = packcontentstore
+        fileservicehistorywrite = packmetadatastore
+    repo.fileservice.setstore(repo.contentstore, repo.metadatastore,
+                              fileservicedatawrite, fileservicehistorywrite)
+    shallowutil.reportpackmetrics(repo.ui, 'filestore',
+        packcontentstore, packmetadatastore)
+
+def wraprepo(repo):
+    class shallowrepository(repo.__class__):
+        @util.propertycache
+        def name(self):
+            return self.ui.config('remotefilelog', 'reponame')
+
+        @util.propertycache
+        def fallbackpath(self):
+            path = repo.ui.config("remotefilelog", "fallbackpath",
+                                  repo.ui.config('paths', 'default'))
+            if not path:
+                raise error.Abort("no remotefilelog server "
+                    "configured - is your .hg/hgrc trusted?")
+
+            return path
+
+        def maybesparsematch(self, *revs, **kwargs):
+            '''
+            A wrapper that allows the remotefilelog to invoke sparsematch() if
+            this is a sparse repository, or returns None if this is not a
+            sparse repository.
+            '''
+            if revs:
+                return sparse.matcher(repo, revs=revs)
+            return sparse.matcher(repo)
+
+        def file(self, f):
+            if f[0] == '/':
+                f = f[1:]
+
+            if self.shallowmatch(f):
+                return remotefilelog.remotefilelog(self.svfs, f, self)
+            else:
+                return super(shallowrepository, self).file(f)
+
+        def filectx(self, path, *args, **kwargs):
+            if self.shallowmatch(path):
+                return remotefilectx.remotefilectx(self, path, *args, **kwargs)
+            else:
+                return super(shallowrepository, self).filectx(path, *args,
+                                                              **kwargs)
+
+        @localrepo.unfilteredmethod
+        def commitctx(self, ctx, error=False):
+            """Add a new revision to current repository.
+            Revision information is passed via the context argument.
+            """
+
+            # some contexts already have manifest nodes, they don't need any
+            # prefetching (for example if we're just editing a commit message
+            # we can reuse manifest
+            if not ctx.manifestnode():
+                # prefetch files that will likely be compared
+                m1 = ctx.p1().manifest()
+                files = []
+                for f in ctx.modified() + ctx.added():
+                    fparent1 = m1.get(f, nullid)
+                    if fparent1 != nullid:
+                        files.append((f, hex(fparent1)))
+                self.fileservice.prefetch(files)
+            return super(shallowrepository, self).commitctx(ctx,
+                                                            error=error)
+
+        def backgroundprefetch(self, revs, base=None, repack=False, pats=None,
+                               opts=None):
+            """Runs prefetch in background with optional repack
+            """
+            cmd = [_hgexecutable(), '-R', repo.origroot, 'prefetch']
+            if repack:
+                cmd.append('--repack')
+            if revs:
+                cmd += ['-r', revs]
+            cmd = ' '.join(map(procutil.shellquote, cmd))
+
+            extutil.runshellcommand(cmd, encoding.environ)
+
+        def prefetch(self, revs, base=None, pats=None, opts=None):
+            """Prefetches all the necessary file revisions for the given revs
+            Optionally runs repack in background
+            """
+            with repo._lock(repo.svfs, 'prefetchlock', True, None, None,
+                            _('prefetching in %s') % repo.origroot):
+                self._prefetch(revs, base, pats, opts)
+
+        def _prefetch(self, revs, base=None, pats=None, opts=None):
+            fallbackpath = self.fallbackpath
+            if fallbackpath:
+                # If we know a rev is on the server, we should fetch the server
+                # version of those files, since our local file versions might
+                # become obsolete if the local commits are stripped.
+                localrevs = repo.revs('outgoing(%s)', fallbackpath)
+                if base is not None and base != nullrev:
+                    serverbase = list(repo.revs('first(reverse(::%s) - %ld)',
+                                                base, localrevs))
+                    if serverbase:
+                        base = serverbase[0]
+            else:
+                localrevs = repo
+
+            mfl = repo.manifestlog
+            mfrevlog = mfl.getstorage('')
+            if base is not None:
+                mfdict = mfl[repo[base].manifestnode()].read()
+                skip = set(mfdict.iteritems())
+            else:
+                skip = set()
+
+            # Copy the skip set to start large and avoid constant resizing,
+            # and since it's likely to be very similar to the prefetch set.
+            files = skip.copy()
+            serverfiles = skip.copy()
+            visited = set()
+            visited.add(nullrev)
+            revnum = 0
+            revcount = len(revs)
+            self.ui.progress(_prefetching, revnum, total=revcount)
+            for rev in sorted(revs):
+                ctx = repo[rev]
+                if pats:
+                    m = scmutil.match(ctx, pats, opts)
+                sparsematch = repo.maybesparsematch(rev)
+
+                mfnode = ctx.manifestnode()
+                mfrev = mfrevlog.rev(mfnode)
+
+                # Decompressing manifests is expensive.
+                # When possible, only read the deltas.
+                p1, p2 = mfrevlog.parentrevs(mfrev)
+                if p1 in visited and p2 in visited:
+                    mfdict = mfl[mfnode].readfast()
+                else:
+                    mfdict = mfl[mfnode].read()
+
+                diff = mfdict.iteritems()
+                if pats:
+                    diff = (pf for pf in diff if m(pf[0]))
+                if sparsematch:
+                    diff = (pf for pf in diff if sparsematch(pf[0]))
+                if rev not in localrevs:
+                    serverfiles.update(diff)
+                else:
+                    files.update(diff)
+
+                visited.add(mfrev)
+                revnum += 1
+                self.ui.progress(_prefetching, revnum, total=revcount)
+
+            files.difference_update(skip)
+            serverfiles.difference_update(skip)
+            self.ui.progress(_prefetching, None)
+
+            # Fetch files known to be on the server
+            if serverfiles:
+                results = [(path, hex(fnode)) for (path, fnode) in serverfiles]
+                repo.fileservice.prefetch(results, force=True)
+
+            # Fetch files that may or may not be on the server
+            if files:
+                results = [(path, hex(fnode)) for (path, fnode) in files]
+                repo.fileservice.prefetch(results)
+
+        def close(self):
+            super(shallowrepository, self).close()
+            self.connectionpool.close()
+
+    repo.__class__ = shallowrepository
+
+    repo.shallowmatch = match.always(repo.root, '')
+
+    makeunionstores(repo)
+
+    repo.includepattern = repo.ui.configlist("remotefilelog", "includepattern",
+                                             None)
+    repo.excludepattern = repo.ui.configlist("remotefilelog", "excludepattern",
+                                             None)
+    if not util.safehasattr(repo, 'connectionpool'):
+        repo.connectionpool = connectionpool.connectionpool(repo)
+
+    if repo.includepattern or repo.excludepattern:
+        repo.shallowmatch = match.match(repo.root, '', None,
+            repo.includepattern, repo.excludepattern)