clonebundles: add support for inline (streaming) clonebundles
authorMathias De Mare <mathias.de_mare@nokia.com>
Wed, 08 Mar 2023 14:23:43 +0100
changeset 50666 60f9602b413e
parent 50646 c814101560d9
child 50668 6b522a9e7451
clonebundles: add support for inline (streaming) clonebundles The idea behind inline clonebundles is to send them through the ssh or https connection to the Mercurial server. We've been using this specifically for streaming clonebundles, although it works for 'regular' clonebundles as well (but is less relevant, since pullbundles exist). We've had this enabled for around 9 months for a part of our users. A few benefits are: - no need to secure an external system, since everything goes through the same Mercurial server - easier scaling (in our case: no risk of inconsistencies between multiple mercurial-server mirrors and nginx clonebundles hosts) Remaining topics/questions right now: - The inline clonebundles don't work for https yet. This is because httppeer doesn't seem to support sending client capabilities. I didn't focus on that as my main goal was to get this working for ssh.
hgext/clonebundles.py
mercurial/bundlecaches.py
mercurial/exchange.py
mercurial/helptext/config.txt
mercurial/httppeer.py
mercurial/interfaces/repository.py
mercurial/localrepo.py
mercurial/sshpeer.py
mercurial/streamclone.py
mercurial/wireprotov1peer.py
mercurial/wireprotov1server.py
tests/test-clonebundles.t
--- a/hgext/clonebundles.py	Wed May 31 18:08:56 2023 +0100
+++ b/hgext/clonebundles.py	Wed Mar 08 14:23:43 2023 +0100
@@ -202,6 +202,18 @@
 Mercurial server when the bundle hosting service fails.
 
 
+inline clonebundles
+-------------------
+
+It is possible to transmit clonebundles inline in case repositories are
+accessed over SSH. This avoids having to setup an external HTTPS server
+and results in the same access control as already present for the SSH setup.
+
+Inline clonebundles should be placed into the `.hg/bundle-cache` directory.
+A clonebundle at `.hg/bundle-cache/mybundle.bundle` is referred to
+in the `clonebundles.manifest` file as `peer-bundle-cache://mybundle.bundle`.
+
+
 auto-generation of clone bundles
 --------------------------------
 
--- a/mercurial/bundlecaches.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/bundlecaches.py	Wed Mar 08 14:23:43 2023 +0100
@@ -23,7 +23,9 @@
 
 urlreq = util.urlreq
 
+BUNDLE_CACHE_DIR = b'bundle-cache'
 CB_MANIFEST_FILE = b'clonebundles.manifest'
+CLONEBUNDLESCHEME = b"peer-bundle-cache://"
 
 
 def get_manifest(repo):
--- a/mercurial/exchange.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/exchange.py	Wed Mar 08 14:23:43 2023 +0100
@@ -2834,7 +2834,7 @@
 
     url = entries[0][b'URL']
     repo.ui.status(_(b'applying clone bundle from %s\n') % url)
-    if trypullbundlefromurl(repo.ui, repo, url):
+    if trypullbundlefromurl(repo.ui, repo, url, remote):
         repo.ui.status(_(b'finished applying clone bundle\n'))
     # Bundle failed.
     #
@@ -2855,11 +2855,22 @@
         )
 
 
-def trypullbundlefromurl(ui, repo, url):
+def inline_clone_bundle_open(ui, url, peer):
+    if not peer:
+        raise error.Abort(_(b'no remote repository supplied for %s' % url))
+    clonebundleid = url[len(bundlecaches.CLONEBUNDLESCHEME) :]
+    peerclonebundle = peer.get_inline_clone_bundle(clonebundleid)
+    return util.chunkbuffer(peerclonebundle)
+
+
+def trypullbundlefromurl(ui, repo, url, peer):
     """Attempt to apply a bundle from a URL."""
     with repo.lock(), repo.transaction(b'bundleurl') as tr:
         try:
-            fh = urlmod.open(ui, url)
+            if url.startswith(bundlecaches.CLONEBUNDLESCHEME):
+                fh = inline_clone_bundle_open(ui, url, peer)
+            else:
+                fh = urlmod.open(ui, url)
             cg = readbundle(ui, fh, b'stream')
 
             if isinstance(cg, streamclone.streamcloneapplier):
--- a/mercurial/helptext/config.txt	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/helptext/config.txt	Wed Mar 08 14:23:43 2023 +0100
@@ -1318,6 +1318,12 @@
   changeset to tag is in ``$HG_NODE``. The name of tag is in ``$HG_TAG``. The
   tag is local if ``$HG_LOCAL=1``, or in the repository if ``$HG_LOCAL=0``.
 
+``pretransmit-inline-clone-bundle``
+  Run before transferring an inline clonebundle to the peer.
+  If the exit status is 0, the inline clonebundle will be allowed to be
+  transferred. A non-zero status will cause the transfer to fail.
+  The path of the inline clonebundle is in ``$HG_CLONEBUNDLEPATH``.
+
 ``pretxnopen``
   Run before any new repository transaction is open. The reason for the
   transaction will be in ``$HG_TXNNAME``, and a unique identifier for the
--- a/mercurial/httppeer.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/httppeer.py	Wed Mar 08 14:23:43 2023 +0100
@@ -441,6 +441,13 @@
     def capabilities(self):
         return self._caps
 
+    def _finish_inline_clone_bundle(self, stream):
+        # HTTP streams must hit the end to process the last empty
+        # chunk of Chunked-Encoding so the connection can be reused.
+        chunk = stream.read(1)
+        if chunk:
+            self._abort(error.ResponseError(_(b"unexpected response:"), chunk))
+
     # End of ipeercommands interface.
 
     def _callstream(self, cmd, _compressible=False, **args):
--- a/mercurial/interfaces/repository.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/interfaces/repository.py	Wed Mar 08 14:23:43 2023 +0100
@@ -176,6 +176,12 @@
         Returns a set of string capabilities.
         """
 
+    def get_inline_clone_bundle(path):
+        """Retrieve clonebundle across the wire.
+
+        Returns a chunkbuffer
+        """
+
     def clonebundles():
         """Obtains the clone bundles manifest for the repo.
 
--- a/mercurial/localrepo.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/localrepo.py	Wed Mar 08 14:23:43 2023 +0100
@@ -348,6 +348,10 @@
     def capabilities(self):
         return self._caps
 
+    def get_inline_clone_bundle(self, path):
+        # not needed with local peer
+        raise NotImplementedError
+
     def clonebundles(self):
         return bundlecaches.get_manifest(self._repo)
 
--- a/mercurial/sshpeer.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/sshpeer.py	Wed Mar 08 14:23:43 2023 +0100
@@ -213,7 +213,7 @@
 
     Returns a list of capabilities that are supported by this client.
     """
-    protoparams = {b'partial-pull'}
+    protoparams = {b'partial-pull', b'inlineclonebundles'}
     comps = [
         e.wireprotosupport().name
         for e in util.compengines.supportedwireengines(util.CLIENTROLE)
--- a/mercurial/streamclone.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/streamclone.py	Wed Mar 08 14:23:43 2023 +0100
@@ -428,7 +428,16 @@
             with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
                 for i in range(filecount):
                     # XXX doesn't support '\n' or '\r' in filenames
-                    l = fp.readline()
+                    if util.safehasattr(fp, 'readline'):
+                        l = fp.readline()
+                    else:
+                        # inline clonebundles use a chunkbuffer, so no readline
+                        # --> this should be small anyway, the first line
+                        # only contains the size of the bundle
+                        l_buf = []
+                        while not (l_buf and l_buf[-1] == b'\n'):
+                            l_buf.append(fp.read(1))
+                        l = b''.join(l_buf)
                     try:
                         name, size = l.split(b'\0', 1)
                         size = int(size)
--- a/mercurial/wireprotov1peer.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/wireprotov1peer.py	Wed Mar 08 14:23:43 2023 +0100
@@ -341,6 +341,19 @@
         self.requirecap(b'clonebundles', _(b'clone bundles'))
         return self._call(b'clonebundles')
 
+    def _finish_inline_clone_bundle(self, stream):
+        pass  # allow override for httppeer
+
+    def get_inline_clone_bundle(self, path):
+        stream = self._callstream(b"get_inline_clone_bundle", path=path)
+        length = util.uvarintdecodestream(stream)
+
+        # SSH streams will block if reading more than length
+        for chunk in util.filechunkiter(stream, limit=length):
+            yield chunk
+
+        self._finish_inline_clone_bundle(stream)
+
     @batchable
     def lookup(self, key):
         self.requirecap(b'lookup', _(b'look up remote revision'))
--- a/mercurial/wireprotov1server.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/wireprotov1server.py	Wed Mar 08 14:23:43 2023 +0100
@@ -21,6 +21,7 @@
     encoding,
     error,
     exchange,
+    hook,
     pushkey as pushkeymod,
     pycompat,
     repoview,
@@ -264,6 +265,40 @@
     return wireprototypes.bytesresponse(b''.join(r))
 
 
+@wireprotocommand(b'get_inline_clone_bundle', b'path', permission=b'pull')
+def get_inline_clone_bundle(repo, proto, path):
+    """
+    Server command to send a clonebundle to the client
+    """
+    if hook.hashook(repo.ui, b'pretransmit-inline-clone-bundle'):
+        hook.hook(
+            repo.ui,
+            repo,
+            b'pretransmit-inline-clone-bundle',
+            throw=True,
+            clonebundlepath=path,
+        )
+
+    bundle_dir = repo.vfs.join(bundlecaches.BUNDLE_CACHE_DIR)
+    clonebundlepath = repo.vfs.join(bundle_dir, path)
+    if not repo.vfs.exists(clonebundlepath):
+        raise error.Abort(b'clonebundle %s does not exist' % path)
+
+    clonebundles_dir = os.path.realpath(bundle_dir)
+    if not os.path.realpath(clonebundlepath).startswith(clonebundles_dir):
+        raise error.Abort(b'clonebundle %s is using an illegal path' % path)
+
+    def generator(vfs, bundle_path):
+        with vfs(bundle_path) as f:
+            length = os.fstat(f.fileno())[6]
+            yield util.uvarintencode(length)
+            for chunk in util.filechunkiter(f):
+                yield chunk
+
+    stream = generator(repo.vfs, clonebundlepath)
+    return wireprototypes.streamres(gen=stream, prefer_uncompressed=True)
+
+
 @wireprotocommand(b'clonebundles', b'', permission=b'pull')
 def clonebundles(repo, proto):
     """Server command for returning info for available bundles to seed clones.
@@ -273,9 +308,21 @@
     Extensions may wrap this command to filter or dynamically emit data
     depending on the request. e.g. you could advertise URLs for the closest
     data center given the client's IP address.
+
+    The only filter on the server side is filtering out inline clonebundles
+    in case a client does not support them.
+    Otherwise, older clients would retrieve and error out on those.
     """
-    manifest = bundlecaches.get_manifest(repo)
-    return wireprototypes.bytesresponse(manifest)
+    manifest_contents = bundlecaches.get_manifest(repo)
+    clientcapabilities = proto.getprotocaps()
+    if b'inlineclonebundles' in clientcapabilities:
+        return wireprototypes.bytesresponse(manifest_contents)
+    modified_manifest = []
+    for line in manifest_contents.splitlines():
+        if line.startswith(bundlecaches.CLONEBUNDLESCHEME):
+            continue
+        modified_manifest.append(line)
+    return wireprototypes.bytesresponse(b'\n'.join(modified_manifest))
 
 
 wireprotocaps = [
--- a/tests/test-clonebundles.t	Wed May 31 18:08:56 2023 +0100
+++ b/tests/test-clonebundles.t	Wed Mar 08 14:23:43 2023 +0100
@@ -219,6 +219,59 @@
   no changes found
   2 local changesets published
 
+Feature works over SSH with inline bundle
+  $ mkdir server/.hg/bundle-cache/
+  $ cp full.hg server/.hg/bundle-cache/
+  $ echo "peer-bundle-cache://full.hg" > server/.hg/clonebundles.manifest
+  $ hg clone -U ssh://user@dummy/server ssh-inline-clone
+  applying clone bundle from peer-bundle-cache://full.hg
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 2 changes to 2 files
+  finished applying clone bundle
+  searching for changes
+  no changes found
+  2 local changesets published
+
+Hooks work with inline bundle
+  $ cp server/.hg/hgrc server/.hg/hgrc-beforeinlinehooks
+  $ echo "[hooks]" >> server/.hg/hgrc
+  $ echo "pretransmit-inline-clone-bundle=echo foo" >> server/.hg/hgrc
+  $ hg clone -U ssh://user@dummy/server ssh-inline-clone-hook
+  applying clone bundle from peer-bundle-cache://full.hg
+  remote: foo
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 2 changes to 2 files
+  finished applying clone bundle
+  searching for changes
+  no changes found
+  2 local changesets published
+
+Hooks can make an inline bundle fail
+  $ cp server/.hg/hgrc-beforeinlinehooks server/.hg/hgrc
+  $ echo "[hooks]" >> server/.hg/hgrc
+  $ echo "pretransmit-inline-clone-bundle=echo bar && false" >> server/.hg/hgrc
+  $ hg clone -U ssh://user@dummy/server ssh-inline-clone-hook-fail
+  applying clone bundle from peer-bundle-cache://full.hg
+  remote: bar
+  remote: abort: pretransmit-inline-clone-bundle hook exited with status 1
+  abort: stream ended unexpectedly (got 0 bytes, expected 1)
+  [255]
+  $ cp server/.hg/hgrc-beforeinlinehooks server/.hg/hgrc
+
+Feature does not use inline bundle over HTTP(S) because there is no protocaps support
+(so no way for the client to announce that it supports inline clonebundles)
+  $ hg clone -U http://localhost:$HGPORT http-inline-clone
+  requesting all changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 2 changes to 2 files
+  new changesets 53245c60e682:aaff8d2ffbbf
+
 Entry with unknown BUNDLESPEC is filtered and not used
 
   $ cat > server/.hg/clonebundles.manifest << EOF