--- a/hgext/clonebundles.py Fri Apr 14 10:41:40 2023 +0200
+++ b/hgext/clonebundles.py Mon Mar 13 17:34:18 2023 +0100
@@ -200,15 +200,72 @@
occurs. So server operators should prepare for some people to follow these
instructions when a failure occurs, thus driving more load to the original
Mercurial server when the bundle hosting service fails.
+
+
+auto-generation of clone bundles
+--------------------------------
+
+It is possible to set Mercurial to automatically re-generate clone bundles when
+new content is available.
+
+Mercurial will take care of the process asynchronously. The defined list of
+bundle type will be generated, uploaded, and advertised.
+
+Bundles Generation:
+...................
+
+The extension can generate multiple variants of the clone bundle. Each
+different variant will be defined by the "bundle-spec" they use::
+
+ [clone-bundles]
+ auto-generate.formats= zstd-v2, gzip-v2
+
+See `hg help bundlespec` for details about available options.
+
+Bundles Upload and Serving:
+...........................
+
+The generated bundles need to be made available to users through a "public" URL.
+This should be donne through `clone-bundles.upload-command` configuration. The
+value of this command should be a shell command. It will have access to the
+bundle file path through the `$HGCB_BUNDLE_PATH` variable. And the expected
+basename in the "public" URL is accessible at::
+
+ [clone-bundles]
+ upload-command=sftp put $HGCB_BUNDLE_PATH \
+ sftp://bundles.host/clone-bundles/$HGCB_BUNDLE_BASENAME
+
+After upload, the file should be available at an url defined by
+`clone-bundles.url-template`.
+
+ [clone-bundles]
+ url-template=https://bundles.host/cache/clone-bundles/{basename}
"""
+import os
+import weakref
+
+from mercurial.i18n import _
+
from mercurial import (
bundlecaches,
+ commands,
+ error,
extensions,
+ localrepo,
+ lock,
+ node,
+ registrar,
+ util,
wireprotov1server,
)
+
+from mercurial.utils import (
+ procutil,
+)
+
testedwith = b'ships-with-hg-core'
@@ -226,3 +283,550 @@
def extsetup(ui):
extensions.wrapfunction(wireprotov1server, b'_capabilities', capabilities)
+
+
+# logic for bundle auto-generation
+
+
+configtable = {}
+configitem = registrar.configitem(configtable)
+
+cmdtable = {}
+command = registrar.command(cmdtable)
+
+configitem(b'clone-bundles', b'auto-generate.formats', default=list)
+
+
+configitem(b'clone-bundles', b'upload-command', default=None)
+
+configitem(b'clone-bundles', b'url-template', default=None)
+
+configitem(b'devel', b'debug.clonebundles', default=False)
+
+
+# category for the post-close transaction hooks
+CAT_POSTCLOSE = b"clonebundles-autobundles"
+
+# template for bundle file names
+BUNDLE_MASK = (
+ b"full-%(bundle_type)s-%(revs)d_revs-%(tip_short)s_tip-%(op_id)s.hg"
+)
+
+
+# file in .hg/ use to track clonebundles being auto-generated
+AUTO_GEN_FILE = b'clonebundles.auto-gen'
+
+
+class BundleBase(object):
+ """represents the core of properties that matters for us in a bundle
+
+ :bundle_type: the bundlespec (see hg help bundlespec)
+ :revs: the number of revisions in the repo at bundle creation time
+ :tip_rev: the rev-num of the tip revision
+ :tip_node: the node id of the tip-most revision in the bundle
+
+ :ready: True if the bundle is ready to be served
+ """
+
+ ready = False
+
+ def __init__(self, bundle_type, revs, tip_rev, tip_node):
+ self.bundle_type = bundle_type
+ self.revs = revs
+ self.tip_rev = tip_rev
+ self.tip_node = tip_node
+
+ def valid_for(self, repo):
+ """is this bundle applicable to the current repository
+
+ This is useful for detecting bundles made irrelevant by stripping.
+ """
+ tip_node = node.bin(self.tip_node)
+ return repo.changelog.index.get_rev(tip_node) == self.tip_rev
+
+ def __eq__(self, other):
+ left = (self.ready, self.bundle_type, self.tip_rev, self.tip_node)
+ right = (other.ready, other.bundle_type, other.tip_rev, other.tip_node)
+ return left == right
+
+ def __neq__(self, other):
+ return not self == other
+
+ def __cmp__(self, other):
+ if self == other:
+ return 0
+ return -1
+
+
+class RequestedBundle(BundleBase):
+ """A bundle that should be generated.
+
+ Additional attributes compared to BundleBase
+ :heads: list of head revisions (as rev-num)
+ :op_id: a "unique" identifier for the operation triggering the change
+ """
+
+ def __init__(self, bundle_type, revs, tip_rev, tip_node, head_revs, op_id):
+ self.head_revs = head_revs
+ self.op_id = op_id
+ super(RequestedBundle, self).__init__(
+ bundle_type,
+ revs,
+ tip_rev,
+ tip_node,
+ )
+
+ @property
+ def suggested_filename(self):
+ """A filename that can be used for the generated bundle"""
+ data = {
+ b'bundle_type': self.bundle_type,
+ b'revs': self.revs,
+ b'heads': self.head_revs,
+ b'tip_rev': self.tip_rev,
+ b'tip_node': self.tip_node,
+ b'tip_short': self.tip_node[:12],
+ b'op_id': self.op_id,
+ }
+ return BUNDLE_MASK % data
+
+ def generate_bundle(self, repo, file_path):
+ """generate the bundle at `filepath`"""
+ commands.bundle(
+ repo.ui,
+ repo,
+ file_path,
+ base=[b"null"],
+ rev=self.head_revs,
+ type=self.bundle_type,
+ quiet=True,
+ )
+
+ def generating(self, file_path, hostname=None, pid=None):
+ """return a GeneratingBundle object from this object"""
+ if pid is None:
+ pid = os.getpid()
+ if hostname is None:
+ hostname = lock._getlockprefix()
+ return GeneratingBundle(
+ self.bundle_type,
+ self.revs,
+ self.tip_rev,
+ self.tip_node,
+ hostname,
+ pid,
+ file_path,
+ )
+
+
+class GeneratingBundle(BundleBase):
+ """A bundle being generated
+
+ extra attributes compared to BundleBase:
+
+ :hostname: the hostname of the machine generating the bundle
+ :pid: the pid of the process generating the bundle
+ :filepath: the target filename of the bundle
+
+ These attributes exist to help detect stalled generation processes.
+ """
+
+ ready = False
+
+ def __init__(
+ self, bundle_type, revs, tip_rev, tip_node, hostname, pid, filepath
+ ):
+ self.hostname = hostname
+ self.pid = pid
+ self.filepath = filepath
+ super(GeneratingBundle, self).__init__(
+ bundle_type, revs, tip_rev, tip_node
+ )
+
+ @classmethod
+ def from_line(cls, line):
+ """create an object by deserializing a line from AUTO_GEN_FILE"""
+ assert line.startswith(b'PENDING-v1 ')
+ (
+ __,
+ bundle_type,
+ revs,
+ tip_rev,
+ tip_node,
+ hostname,
+ pid,
+ filepath,
+ ) = line.split()
+ hostname = util.urlreq.unquote(hostname)
+ filepath = util.urlreq.unquote(filepath)
+ revs = int(revs)
+ tip_rev = int(tip_rev)
+ pid = int(pid)
+ return cls(
+ bundle_type, revs, tip_rev, tip_node, hostname, pid, filepath
+ )
+
+ def to_line(self):
+ """serialize the object to include as a line in AUTO_GEN_FILE"""
+ templ = b"PENDING-v1 %s %d %d %s %s %d %s"
+ data = (
+ self.bundle_type,
+ self.revs,
+ self.tip_rev,
+ self.tip_node,
+ util.urlreq.quote(self.hostname),
+ self.pid,
+ util.urlreq.quote(self.filepath),
+ )
+ return templ % data
+
+ def __eq__(self, other):
+ if not super(GeneratingBundle, self).__eq__(other):
+ return False
+ left = (self.hostname, self.pid, self.filepath)
+ right = (other.hostname, other.pid, other.filepath)
+ return left == right
+
+ def uploaded(self, url, basename):
+ """return a GeneratedBundle from this object"""
+ return GeneratedBundle(
+ self.bundle_type,
+ self.revs,
+ self.tip_rev,
+ self.tip_node,
+ url,
+ basename,
+ )
+
+
+class GeneratedBundle(BundleBase):
+ """A bundle that is done being generated and can be served
+
+ extra attributes compared to BundleBase:
+
+ :file_url: the url where the bundle is available.
+ :basename: the "basename" used to upload (useful for deletion)
+
+ These attributes exist to generate a bundle manifest
+ (.hg/pullbundles.manifest)
+ """
+
+ ready = True
+
+ def __init__(
+ self, bundle_type, revs, tip_rev, tip_node, file_url, basename
+ ):
+ self.file_url = file_url
+ self.basename = basename
+ super(GeneratedBundle, self).__init__(
+ bundle_type, revs, tip_rev, tip_node
+ )
+
+ @classmethod
+ def from_line(cls, line):
+ """create an object by deserializing a line from AUTO_GEN_FILE"""
+ assert line.startswith(b'DONE-v1 ')
+ (
+ __,
+ bundle_type,
+ revs,
+ tip_rev,
+ tip_node,
+ file_url,
+ basename,
+ ) = line.split()
+ revs = int(revs)
+ tip_rev = int(tip_rev)
+ file_url = util.urlreq.unquote(file_url)
+ return cls(bundle_type, revs, tip_rev, tip_node, file_url, basename)
+
+ def to_line(self):
+ """serialize the object to include as a line in AUTO_GEN_FILE"""
+ templ = b"DONE-v1 %s %d %d %s %s %s"
+ data = (
+ self.bundle_type,
+ self.revs,
+ self.tip_rev,
+ self.tip_node,
+ util.urlreq.quote(self.file_url),
+ self.basename,
+ )
+ return templ % data
+
+ def manifest_line(self):
+ """serialize the object to include as a line in pullbundles.manifest"""
+ templ = b"%s BUNDLESPEC=%s REQUIRESNI=true"
+ return templ % (self.file_url, self.bundle_type)
+
+ def __eq__(self, other):
+ if not super(GeneratedBundle, self).__eq__(other):
+ return False
+ return self.file_url == other.file_url
+
+
+def parse_auto_gen(content):
+ """parse the AUTO_GEN_FILE to return a list of Bundle object"""
+ bundles = []
+ for line in content.splitlines():
+ if line.startswith(b'PENDING-v1 '):
+ bundles.append(GeneratingBundle.from_line(line))
+ elif line.startswith(b'DONE-v1 '):
+ bundles.append(GeneratedBundle.from_line(line))
+ return bundles
+
+
+def dumps_auto_gen(bundles):
+ """serialize a list of Bundle as a AUTO_GEN_FILE content"""
+ lines = []
+ for b in bundles:
+ lines.append(b"%s\n" % b.to_line())
+ lines.sort()
+ return b"".join(lines)
+
+
+def read_auto_gen(repo):
+ """read the AUTO_GEN_FILE for the <repo> a list of Bundle object"""
+ data = repo.vfs.tryread(AUTO_GEN_FILE)
+ if not data:
+ return []
+ return parse_auto_gen(data)
+
+
+def write_auto_gen(repo, bundles):
+ """write a list of Bundle objects into the repo's AUTO_GEN_FILE"""
+ assert repo._cb_lock_ref is not None
+ data = dumps_auto_gen(bundles)
+ with repo.vfs(AUTO_GEN_FILE, mode=b'wb', atomictemp=True) as f:
+ f.write(data)
+
+
+def generate_manifest(bundles):
+ """write a list of Bundle objects into the repo's AUTO_GEN_FILE"""
+ bundles = list(bundles)
+ bundles.sort(key=lambda b: b.bundle_type)
+ lines = []
+ for b in bundles:
+ lines.append(b"%s\n" % b.manifest_line())
+ return b"".join(lines)
+
+
+def update_ondisk_manifest(repo):
+ """update the clonebundle manifest with latest url"""
+ with repo.clonebundles_lock():
+ bundles = read_auto_gen(repo)
+
+ per_types = {}
+ for b in bundles:
+ if not (b.ready and b.valid_for(repo)):
+ continue
+ current = per_types.get(b.bundle_type)
+ if current is not None and current.revs >= b.revs:
+ continue
+ per_types[b.bundle_type] = b
+ manifest = generate_manifest(per_types.values())
+ with repo.vfs(
+ bundlecaches.CB_MANIFEST_FILE, mode=b"wb", atomictemp=True
+ ) as f:
+ f.write(manifest)
+
+
+def update_bundle_list(repo, new_bundles=(), del_bundles=()):
+ """modify the repo's AUTO_GEN_FILE
+
+ This method also regenerates the clone bundle manifest when needed"""
+ with repo.clonebundles_lock():
+ bundles = read_auto_gen(repo)
+ if del_bundles:
+ bundles = [b for b in bundles if b not in del_bundles]
+ new_bundles = [b for b in new_bundles if b not in bundles]
+ bundles.extend(new_bundles)
+ write_auto_gen(repo, bundles)
+ all_changed = []
+ all_changed.extend(new_bundles)
+ all_changed.extend(del_bundles)
+ if any(b.ready for b in all_changed):
+ update_ondisk_manifest(repo)
+
+
+def cleanup_tmp_bundle(repo, target):
+ """remove a GeneratingBundle file and entry"""
+ assert not target.ready
+ with repo.clonebundles_lock():
+ repo.vfs.tryunlink(target.filepath)
+ update_bundle_list(repo, del_bundles=[target])
+
+
+def finalize_one_bundle(repo, target):
+ """upload a generated bundle and advertise it in the clonebundles.manifest"""
+ with repo.clonebundles_lock():
+ bundles = read_auto_gen(repo)
+ if target in bundles and target.valid_for(repo):
+ result = upload_bundle(repo, target)
+ update_bundle_list(repo, new_bundles=[result])
+ cleanup_tmp_bundle(repo, target)
+
+
+def upload_bundle(repo, bundle):
+ """upload the result of a GeneratingBundle and return a GeneratedBundle
+
+ The upload is done using the `clone-bundles.upload-command`
+ """
+ cmd = repo.ui.config(b'clone-bundles', b'upload-command')
+ url = repo.ui.config(b'clone-bundles', b'url-template')
+ basename = repo.vfs.basename(bundle.filepath)
+ filepath = procutil.shellquote(bundle.filepath)
+ variables = {
+ b'HGCB_BUNDLE_PATH': filepath,
+ b'HGCB_BUNDLE_BASENAME': basename,
+ }
+ env = procutil.shellenviron(environ=variables)
+ ret = repo.ui.system(cmd, environ=env)
+ if ret:
+ raise error.Abort(b"command returned status %d: %s" % (ret, cmd))
+ url = (
+ url.decode('utf8')
+ .format(basename=basename.decode('utf8'))
+ .encode('utf8')
+ )
+ return bundle.uploaded(url, basename)
+
+
+def auto_bundle_needed_actions(repo, bundles, op_id):
+ """find the list of bundles that need action
+
+ returns a list of RequestedBundle objects that need to be generated and
+ uploaded."""
+ create_bundles = []
+ repo = repo.filtered(b"immutable")
+ targets = repo.ui.configlist(b'clone-bundles', b'auto-generate.formats')
+ revs = len(repo.changelog)
+ generic_data = {
+ 'revs': revs,
+ 'head_revs': repo.changelog.headrevs(),
+ 'tip_rev': repo.changelog.tiprev(),
+ 'tip_node': node.hex(repo.changelog.tip()),
+ 'op_id': op_id,
+ }
+ for t in targets:
+ data = generic_data.copy()
+ data['bundle_type'] = t
+ b = RequestedBundle(**data)
+ create_bundles.append(b)
+ return create_bundles
+
+
+def start_one_bundle(repo, bundle):
+ """start the generation of a single bundle file
+
+ the `bundle` argument should be a RequestedBundle object.
+
+ This data is passed to the `debugmakeclonebundles` "as is".
+ """
+ data = util.pickle.dumps(bundle)
+ cmd = [procutil.hgexecutable(), b'--cwd', repo.path, INTERNAL_CMD]
+ env = procutil.shellenviron()
+ msg = b'clone-bundles: starting bundle generation: %s\n'
+ stdout = None
+ stderr = None
+ waits = []
+ record_wait = None
+ if repo.ui.configbool(b'devel', b'debug.clonebundles'):
+ stdout = procutil.stdout
+ stderr = procutil.stderr
+ repo.ui.write(msg % bundle.bundle_type)
+ record_wait = waits.append
+ else:
+ repo.ui.debug(msg % bundle.bundle_type)
+ bg = procutil.runbgcommand
+ bg(
+ cmd,
+ env,
+ stdin_bytes=data,
+ stdout=stdout,
+ stderr=stderr,
+ record_wait=record_wait,
+ )
+ for f in waits:
+ f()
+
+
+INTERNAL_CMD = b'debug::internal-make-clone-bundles'
+
+
+@command(INTERNAL_CMD, [], b'')
+def debugmakeclonebundles(ui, repo):
+ """Internal command to auto-generate debug bundles"""
+ requested_bundle = util.pickle.load(procutil.stdin)
+ procutil.stdin.close()
+
+ fname = requested_bundle.suggested_filename
+ fpath = repo.vfs.makedirs(b'tmp-bundles')
+ fpath = repo.vfs.join(b'tmp-bundles', fname)
+ bundle = requested_bundle.generating(fpath)
+ update_bundle_list(repo, new_bundles=[bundle])
+
+ requested_bundle.generate_bundle(repo, fpath)
+
+ repo.invalidate()
+ finalize_one_bundle(repo, bundle)
+
+
+def make_auto_bundler(source_repo):
+ reporef = weakref.ref(source_repo)
+
+ def autobundle(tr):
+ repo = reporef()
+ assert repo is not None
+ bundles = read_auto_gen(repo)
+ new = auto_bundle_needed_actions(repo, bundles, b"%d_txn" % id(tr))
+ for data in new:
+ start_one_bundle(repo, data)
+ return None
+
+ return autobundle
+
+
+def reposetup(ui, repo):
+ """install the two pieces needed for automatic clonebundle generation
+
+ - add a "post-close" hook that fires bundling when needed
+ - introduce a clone-bundle lock to let multiple processes meddle with the
+ state files.
+ """
+ if not repo.local():
+ return
+
+ class autobundlesrepo(repo.__class__):
+ def transaction(self, *args, **kwargs):
+ tr = super(autobundlesrepo, self).transaction(*args, **kwargs)
+ targets = repo.ui.configlist(
+ b'clone-bundles', b'auto-generate.formats'
+ )
+ if targets:
+ tr.addpostclose(CAT_POSTCLOSE, make_auto_bundler(self))
+ return tr
+
+ @localrepo.unfilteredmethod
+ def clonebundles_lock(self, wait=True):
+ '''Lock the repository file related to clone bundles'''
+ if not util.safehasattr(self, '_cb_lock_ref'):
+ self._cb_lock_ref = None
+ l = self._currentlock(self._cb_lock_ref)
+ if l is not None:
+ l.lock()
+ return l
+
+ l = self._lock(
+ vfs=self.vfs,
+ lockname=b"clonebundleslock",
+ wait=wait,
+ releasefn=None,
+ acquirefn=None,
+ desc=_(b'repository %s') % self.origroot,
+ )
+ self._cb_lock_ref = weakref.ref(l)
+ return l
+
+ repo._wlockfreeprefix.add(AUTO_GEN_FILE)
+ repo._wlockfreeprefix.add(bundlecaches.CB_MANIFEST_FILE)
+ repo.__class__ = autobundlesrepo