diff mercurial/hg.py @ 25761:0d37b9b21467

hg: support for auto sharing stores when cloning Many 3rd party consumers of Mercurial have created wrappers to essentially perform clone+share as a single operation. This is especially popular in automated processes like continuous integration systems. The Jenkins CI software and Mozilla's Firefox release automation infrastructure have both implemented custom code that effectively perform clone+share. The common use case here is that clients want to obtain N>1 checkouts while minimizing disk space and network requirements. Furthermore, they often don't care that a clone is an exact mirror of a remote: they are simply looking to obtain checkouts of specific revisions. When multiple third parties implement a similar feature, it's a good sign that the feature is worth adding to the core product. This patch adds support for an easy-to-use clone+share feature. The internal "clone" function now accepts options to control auto sharing during clone. When the auto share mode is active, a store will be created/updated under the base directory specified and a new repository pointing to the shared store will be created at the path specified by the user. The share extension has grown the ability to pass these options into the clone command/function. No command line options for this feature are added because we don't feel the feature will be popular enough to warrant their existence. There are two modes for auto share mode. In the default mode, the shared repo is derived from the first changeset (rev 0) in the remote repository. This enables related repositories existing at different URLs to automatically use the same storage. In environments that operate several repositories (separate repo for branch/head/bookmark or separate repo per user), this has the potential to drastically reduce storage and network requirements. In the other mode, the name is derived from the remote's path/URL.
author Gregory Szorc <gregory.szorc@gmail.com>
date Wed, 08 Jul 2015 16:19:09 -0700
parents 328739ea70c3
children 130c0b83e963 d08e7449ff27
line wrap: on
line diff
--- a/mercurial/hg.py	Wed Jul 08 16:43:49 2015 -0500
+++ b/mercurial/hg.py	Wed Jul 08 16:19:09 2015 -0700
@@ -284,8 +284,50 @@
         release(destlock)
         raise
 
+def clonewithshare(ui, peeropts, sharepath, source, srcpeer, dest, pull=False,
+                   rev=None, update=True, stream=False):
+    """Perform a clone using a shared repo.
+
+    The store for the repository will be located at <sharepath>/.hg. The
+    specified revisions will be cloned or pulled from "source". A shared repo
+    will be created at "dest" and a working copy will be created if "update" is
+    True.
+    """
+    revs = None
+    if rev:
+        if not srcpeer.capable('lookup'):
+            raise util.Abort(_("src repository does not support "
+                               "revision lookup and so doesn't "
+                               "support clone by revision"))
+        revs = [srcpeer.lookup(r) for r in rev]
+
+    basename = os.path.basename(sharepath)
+
+    if os.path.exists(sharepath):
+        ui.status(_('(sharing from existing pooled repository %s)\n') %
+                  basename)
+    else:
+        ui.status(_('(sharing from new pooled repository %s)\n') % basename)
+        # Always use pull mode because hardlinks in share mode don't work well.
+        # Never update because working copies aren't necessary in share mode.
+        clone(ui, peeropts, source, dest=sharepath, pull=True,
+              rev=rev, update=False, stream=stream)
+
+    sharerepo = repository(ui, path=sharepath)
+    share(ui, sharerepo, dest=dest, update=update, bookmarks=False)
+
+    # We need to perform a pull against the dest repo to fetch bookmarks
+    # and other non-store data that isn't shared by default. In the case of
+    # non-existing shared repo, this means we pull from the remote twice. This
+    # is a bit weird. But at the time it was implemented, there wasn't an easy
+    # way to pull just non-changegroup data.
+    destrepo = repository(ui, path=dest)
+    exchange.pull(destrepo, srcpeer, heads=revs)
+
+    return srcpeer, peer(ui, peeropts, dest)
+
 def clone(ui, peeropts, source, dest=None, pull=False, rev=None,
-          update=True, stream=False, branch=None):
+          update=True, stream=False, branch=None, shareopts=None):
     """Make a copy of an existing repository.
 
     Create a copy of an existing repository in a new directory.  The
@@ -320,6 +362,13 @@
     anything else is treated as a revision)
 
     branch: branches to clone
+
+    shareopts: dict of options to control auto sharing behavior. The "pool" key
+    activates auto sharing mode and defines the directory for stores. The
+    "mode" key determines how to construct the directory name of the shared
+    repository. "identity" means the name is derived from the node of the first
+    changeset in the repository. "remote" means the name is derived from the
+    remote's path/URL. Defaults to "identity."
     """
 
     if isinstance(source, str):
@@ -352,6 +401,36 @@
         elif destvfs.listdir():
             raise util.Abort(_("destination '%s' is not empty") % dest)
 
+    shareopts = shareopts or {}
+    sharepool = shareopts.get('pool')
+    sharenamemode = shareopts.get('mode')
+    if sharepool:
+        sharepath = None
+        if sharenamemode == 'identity':
+            # Resolve the name from the initial changeset in the remote
+            # repository. This returns nullid when the remote is empty. It
+            # raises RepoLookupError if revision 0 is filtered or otherwise
+            # not available. If we fail to resolve, sharing is not enabled.
+            try:
+                rootnode = srcpeer.lookup('0')
+                if rootnode != node.nullid:
+                    sharepath = os.path.join(sharepool, node.hex(rootnode))
+                else:
+                    ui.status(_('(not using pooled storage: '
+                                'remote appears to be empty)\n'))
+            except error.RepoLookupError:
+                ui.status(_('(not using pooled storage: '
+                            'unable to resolve identity of remote)\n'))
+        elif sharenamemode == 'remote':
+            sharepath = os.path.join(sharepool, util.sha1(source).hexdigest())
+        else:
+            raise util.Abort('unknown share naming mode: %s' % sharenamemode)
+
+        if sharepath:
+            return clonewithshare(ui, peeropts, sharepath, source, srcpeer,
+                                  dest, pull=pull, rev=rev, update=update,
+                                  stream=stream)
+
     srclock = destlock = cleandir = None
     srcrepo = srcpeer.local()
     try: