Mercurial > hg
changeset 50430:5ae30ff79c76
clone-bundles: add a basic first version of automatic bundle generation
See inline documentation for details.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 13 Mar 2023 17:34:18 +0100 |
parents | 771294224bf6 |
children | 971dc2369b04 |
files | hgext/clonebundles.py tests/test-clonebundles-autogen.t |
diffstat | 2 files changed, 674 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/clonebundles.py Fri Apr 14 10:41:40 2023 +0200 +++ b/hgext/clonebundles.py Mon Mar 13 17:34:18 2023 +0100 @@ -200,15 +200,72 @@ occurs. So server operators should prepare for some people to follow these instructions when a failure occurs, thus driving more load to the original Mercurial server when the bundle hosting service fails. + + +auto-generation of clone bundles +-------------------------------- + +It is possible to set Mercurial to automatically re-generate clone bundles when +new content is available. + +Mercurial will take care of the process asynchronously. The defined list of +bundle type will be generated, uploaded, and advertised. + +Bundles Generation: +................... + +The extension can generate multiple variants of the clone bundle. Each +different variant will be defined by the "bundle-spec" they use:: + + [clone-bundles] + auto-generate.formats= zstd-v2, gzip-v2 + +See `hg help bundlespec` for details about available options. + +Bundles Upload and Serving: +........................... + +The generated bundles need to be made available to users through a "public" URL. +This should be donne through `clone-bundles.upload-command` configuration. The +value of this command should be a shell command. It will have access to the +bundle file path through the `$HGCB_BUNDLE_PATH` variable. And the expected +basename in the "public" URL is accessible at:: + + [clone-bundles] + upload-command=sftp put $HGCB_BUNDLE_PATH \ + sftp://bundles.host/clone-bundles/$HGCB_BUNDLE_BASENAME + +After upload, the file should be available at an url defined by +`clone-bundles.url-template`. + + [clone-bundles] + url-template=https://bundles.host/cache/clone-bundles/{basename} """ +import os +import weakref + +from mercurial.i18n import _ + from mercurial import ( bundlecaches, + commands, + error, extensions, + localrepo, + lock, + node, + registrar, + util, wireprotov1server, ) + +from mercurial.utils import ( + procutil, +) + testedwith = b'ships-with-hg-core' @@ -226,3 +283,550 @@ def extsetup(ui): extensions.wrapfunction(wireprotov1server, b'_capabilities', capabilities) + + +# logic for bundle auto-generation + + +configtable = {} +configitem = registrar.configitem(configtable) + +cmdtable = {} +command = registrar.command(cmdtable) + +configitem(b'clone-bundles', b'auto-generate.formats', default=list) + + +configitem(b'clone-bundles', b'upload-command', default=None) + +configitem(b'clone-bundles', b'url-template', default=None) + +configitem(b'devel', b'debug.clonebundles', default=False) + + +# category for the post-close transaction hooks +CAT_POSTCLOSE = b"clonebundles-autobundles" + +# template for bundle file names +BUNDLE_MASK = ( + b"full-%(bundle_type)s-%(revs)d_revs-%(tip_short)s_tip-%(op_id)s.hg" +) + + +# file in .hg/ use to track clonebundles being auto-generated +AUTO_GEN_FILE = b'clonebundles.auto-gen' + + +class BundleBase(object): + """represents the core of properties that matters for us in a bundle + + :bundle_type: the bundlespec (see hg help bundlespec) + :revs: the number of revisions in the repo at bundle creation time + :tip_rev: the rev-num of the tip revision + :tip_node: the node id of the tip-most revision in the bundle + + :ready: True if the bundle is ready to be served + """ + + ready = False + + def __init__(self, bundle_type, revs, tip_rev, tip_node): + self.bundle_type = bundle_type + self.revs = revs + self.tip_rev = tip_rev + self.tip_node = tip_node + + def valid_for(self, repo): + """is this bundle applicable to the current repository + + This is useful for detecting bundles made irrelevant by stripping. + """ + tip_node = node.bin(self.tip_node) + return repo.changelog.index.get_rev(tip_node) == self.tip_rev + + def __eq__(self, other): + left = (self.ready, self.bundle_type, self.tip_rev, self.tip_node) + right = (other.ready, other.bundle_type, other.tip_rev, other.tip_node) + return left == right + + def __neq__(self, other): + return not self == other + + def __cmp__(self, other): + if self == other: + return 0 + return -1 + + +class RequestedBundle(BundleBase): + """A bundle that should be generated. + + Additional attributes compared to BundleBase + :heads: list of head revisions (as rev-num) + :op_id: a "unique" identifier for the operation triggering the change + """ + + def __init__(self, bundle_type, revs, tip_rev, tip_node, head_revs, op_id): + self.head_revs = head_revs + self.op_id = op_id + super(RequestedBundle, self).__init__( + bundle_type, + revs, + tip_rev, + tip_node, + ) + + @property + def suggested_filename(self): + """A filename that can be used for the generated bundle""" + data = { + b'bundle_type': self.bundle_type, + b'revs': self.revs, + b'heads': self.head_revs, + b'tip_rev': self.tip_rev, + b'tip_node': self.tip_node, + b'tip_short': self.tip_node[:12], + b'op_id': self.op_id, + } + return BUNDLE_MASK % data + + def generate_bundle(self, repo, file_path): + """generate the bundle at `filepath`""" + commands.bundle( + repo.ui, + repo, + file_path, + base=[b"null"], + rev=self.head_revs, + type=self.bundle_type, + quiet=True, + ) + + def generating(self, file_path, hostname=None, pid=None): + """return a GeneratingBundle object from this object""" + if pid is None: + pid = os.getpid() + if hostname is None: + hostname = lock._getlockprefix() + return GeneratingBundle( + self.bundle_type, + self.revs, + self.tip_rev, + self.tip_node, + hostname, + pid, + file_path, + ) + + +class GeneratingBundle(BundleBase): + """A bundle being generated + + extra attributes compared to BundleBase: + + :hostname: the hostname of the machine generating the bundle + :pid: the pid of the process generating the bundle + :filepath: the target filename of the bundle + + These attributes exist to help detect stalled generation processes. + """ + + ready = False + + def __init__( + self, bundle_type, revs, tip_rev, tip_node, hostname, pid, filepath + ): + self.hostname = hostname + self.pid = pid + self.filepath = filepath + super(GeneratingBundle, self).__init__( + bundle_type, revs, tip_rev, tip_node + ) + + @classmethod + def from_line(cls, line): + """create an object by deserializing a line from AUTO_GEN_FILE""" + assert line.startswith(b'PENDING-v1 ') + ( + __, + bundle_type, + revs, + tip_rev, + tip_node, + hostname, + pid, + filepath, + ) = line.split() + hostname = util.urlreq.unquote(hostname) + filepath = util.urlreq.unquote(filepath) + revs = int(revs) + tip_rev = int(tip_rev) + pid = int(pid) + return cls( + bundle_type, revs, tip_rev, tip_node, hostname, pid, filepath + ) + + def to_line(self): + """serialize the object to include as a line in AUTO_GEN_FILE""" + templ = b"PENDING-v1 %s %d %d %s %s %d %s" + data = ( + self.bundle_type, + self.revs, + self.tip_rev, + self.tip_node, + util.urlreq.quote(self.hostname), + self.pid, + util.urlreq.quote(self.filepath), + ) + return templ % data + + def __eq__(self, other): + if not super(GeneratingBundle, self).__eq__(other): + return False + left = (self.hostname, self.pid, self.filepath) + right = (other.hostname, other.pid, other.filepath) + return left == right + + def uploaded(self, url, basename): + """return a GeneratedBundle from this object""" + return GeneratedBundle( + self.bundle_type, + self.revs, + self.tip_rev, + self.tip_node, + url, + basename, + ) + + +class GeneratedBundle(BundleBase): + """A bundle that is done being generated and can be served + + extra attributes compared to BundleBase: + + :file_url: the url where the bundle is available. + :basename: the "basename" used to upload (useful for deletion) + + These attributes exist to generate a bundle manifest + (.hg/pullbundles.manifest) + """ + + ready = True + + def __init__( + self, bundle_type, revs, tip_rev, tip_node, file_url, basename + ): + self.file_url = file_url + self.basename = basename + super(GeneratedBundle, self).__init__( + bundle_type, revs, tip_rev, tip_node + ) + + @classmethod + def from_line(cls, line): + """create an object by deserializing a line from AUTO_GEN_FILE""" + assert line.startswith(b'DONE-v1 ') + ( + __, + bundle_type, + revs, + tip_rev, + tip_node, + file_url, + basename, + ) = line.split() + revs = int(revs) + tip_rev = int(tip_rev) + file_url = util.urlreq.unquote(file_url) + return cls(bundle_type, revs, tip_rev, tip_node, file_url, basename) + + def to_line(self): + """serialize the object to include as a line in AUTO_GEN_FILE""" + templ = b"DONE-v1 %s %d %d %s %s %s" + data = ( + self.bundle_type, + self.revs, + self.tip_rev, + self.tip_node, + util.urlreq.quote(self.file_url), + self.basename, + ) + return templ % data + + def manifest_line(self): + """serialize the object to include as a line in pullbundles.manifest""" + templ = b"%s BUNDLESPEC=%s REQUIRESNI=true" + return templ % (self.file_url, self.bundle_type) + + def __eq__(self, other): + if not super(GeneratedBundle, self).__eq__(other): + return False + return self.file_url == other.file_url + + +def parse_auto_gen(content): + """parse the AUTO_GEN_FILE to return a list of Bundle object""" + bundles = [] + for line in content.splitlines(): + if line.startswith(b'PENDING-v1 '): + bundles.append(GeneratingBundle.from_line(line)) + elif line.startswith(b'DONE-v1 '): + bundles.append(GeneratedBundle.from_line(line)) + return bundles + + +def dumps_auto_gen(bundles): + """serialize a list of Bundle as a AUTO_GEN_FILE content""" + lines = [] + for b in bundles: + lines.append(b"%s\n" % b.to_line()) + lines.sort() + return b"".join(lines) + + +def read_auto_gen(repo): + """read the AUTO_GEN_FILE for the <repo> a list of Bundle object""" + data = repo.vfs.tryread(AUTO_GEN_FILE) + if not data: + return [] + return parse_auto_gen(data) + + +def write_auto_gen(repo, bundles): + """write a list of Bundle objects into the repo's AUTO_GEN_FILE""" + assert repo._cb_lock_ref is not None + data = dumps_auto_gen(bundles) + with repo.vfs(AUTO_GEN_FILE, mode=b'wb', atomictemp=True) as f: + f.write(data) + + +def generate_manifest(bundles): + """write a list of Bundle objects into the repo's AUTO_GEN_FILE""" + bundles = list(bundles) + bundles.sort(key=lambda b: b.bundle_type) + lines = [] + for b in bundles: + lines.append(b"%s\n" % b.manifest_line()) + return b"".join(lines) + + +def update_ondisk_manifest(repo): + """update the clonebundle manifest with latest url""" + with repo.clonebundles_lock(): + bundles = read_auto_gen(repo) + + per_types = {} + for b in bundles: + if not (b.ready and b.valid_for(repo)): + continue + current = per_types.get(b.bundle_type) + if current is not None and current.revs >= b.revs: + continue + per_types[b.bundle_type] = b + manifest = generate_manifest(per_types.values()) + with repo.vfs( + bundlecaches.CB_MANIFEST_FILE, mode=b"wb", atomictemp=True + ) as f: + f.write(manifest) + + +def update_bundle_list(repo, new_bundles=(), del_bundles=()): + """modify the repo's AUTO_GEN_FILE + + This method also regenerates the clone bundle manifest when needed""" + with repo.clonebundles_lock(): + bundles = read_auto_gen(repo) + if del_bundles: + bundles = [b for b in bundles if b not in del_bundles] + new_bundles = [b for b in new_bundles if b not in bundles] + bundles.extend(new_bundles) + write_auto_gen(repo, bundles) + all_changed = [] + all_changed.extend(new_bundles) + all_changed.extend(del_bundles) + if any(b.ready for b in all_changed): + update_ondisk_manifest(repo) + + +def cleanup_tmp_bundle(repo, target): + """remove a GeneratingBundle file and entry""" + assert not target.ready + with repo.clonebundles_lock(): + repo.vfs.tryunlink(target.filepath) + update_bundle_list(repo, del_bundles=[target]) + + +def finalize_one_bundle(repo, target): + """upload a generated bundle and advertise it in the clonebundles.manifest""" + with repo.clonebundles_lock(): + bundles = read_auto_gen(repo) + if target in bundles and target.valid_for(repo): + result = upload_bundle(repo, target) + update_bundle_list(repo, new_bundles=[result]) + cleanup_tmp_bundle(repo, target) + + +def upload_bundle(repo, bundle): + """upload the result of a GeneratingBundle and return a GeneratedBundle + + The upload is done using the `clone-bundles.upload-command` + """ + cmd = repo.ui.config(b'clone-bundles', b'upload-command') + url = repo.ui.config(b'clone-bundles', b'url-template') + basename = repo.vfs.basename(bundle.filepath) + filepath = procutil.shellquote(bundle.filepath) + variables = { + b'HGCB_BUNDLE_PATH': filepath, + b'HGCB_BUNDLE_BASENAME': basename, + } + env = procutil.shellenviron(environ=variables) + ret = repo.ui.system(cmd, environ=env) + if ret: + raise error.Abort(b"command returned status %d: %s" % (ret, cmd)) + url = ( + url.decode('utf8') + .format(basename=basename.decode('utf8')) + .encode('utf8') + ) + return bundle.uploaded(url, basename) + + +def auto_bundle_needed_actions(repo, bundles, op_id): + """find the list of bundles that need action + + returns a list of RequestedBundle objects that need to be generated and + uploaded.""" + create_bundles = [] + repo = repo.filtered(b"immutable") + targets = repo.ui.configlist(b'clone-bundles', b'auto-generate.formats') + revs = len(repo.changelog) + generic_data = { + 'revs': revs, + 'head_revs': repo.changelog.headrevs(), + 'tip_rev': repo.changelog.tiprev(), + 'tip_node': node.hex(repo.changelog.tip()), + 'op_id': op_id, + } + for t in targets: + data = generic_data.copy() + data['bundle_type'] = t + b = RequestedBundle(**data) + create_bundles.append(b) + return create_bundles + + +def start_one_bundle(repo, bundle): + """start the generation of a single bundle file + + the `bundle` argument should be a RequestedBundle object. + + This data is passed to the `debugmakeclonebundles` "as is". + """ + data = util.pickle.dumps(bundle) + cmd = [procutil.hgexecutable(), b'--cwd', repo.path, INTERNAL_CMD] + env = procutil.shellenviron() + msg = b'clone-bundles: starting bundle generation: %s\n' + stdout = None + stderr = None + waits = [] + record_wait = None + if repo.ui.configbool(b'devel', b'debug.clonebundles'): + stdout = procutil.stdout + stderr = procutil.stderr + repo.ui.write(msg % bundle.bundle_type) + record_wait = waits.append + else: + repo.ui.debug(msg % bundle.bundle_type) + bg = procutil.runbgcommand + bg( + cmd, + env, + stdin_bytes=data, + stdout=stdout, + stderr=stderr, + record_wait=record_wait, + ) + for f in waits: + f() + + +INTERNAL_CMD = b'debug::internal-make-clone-bundles' + + +@command(INTERNAL_CMD, [], b'') +def debugmakeclonebundles(ui, repo): + """Internal command to auto-generate debug bundles""" + requested_bundle = util.pickle.load(procutil.stdin) + procutil.stdin.close() + + fname = requested_bundle.suggested_filename + fpath = repo.vfs.makedirs(b'tmp-bundles') + fpath = repo.vfs.join(b'tmp-bundles', fname) + bundle = requested_bundle.generating(fpath) + update_bundle_list(repo, new_bundles=[bundle]) + + requested_bundle.generate_bundle(repo, fpath) + + repo.invalidate() + finalize_one_bundle(repo, bundle) + + +def make_auto_bundler(source_repo): + reporef = weakref.ref(source_repo) + + def autobundle(tr): + repo = reporef() + assert repo is not None + bundles = read_auto_gen(repo) + new = auto_bundle_needed_actions(repo, bundles, b"%d_txn" % id(tr)) + for data in new: + start_one_bundle(repo, data) + return None + + return autobundle + + +def reposetup(ui, repo): + """install the two pieces needed for automatic clonebundle generation + + - add a "post-close" hook that fires bundling when needed + - introduce a clone-bundle lock to let multiple processes meddle with the + state files. + """ + if not repo.local(): + return + + class autobundlesrepo(repo.__class__): + def transaction(self, *args, **kwargs): + tr = super(autobundlesrepo, self).transaction(*args, **kwargs) + targets = repo.ui.configlist( + b'clone-bundles', b'auto-generate.formats' + ) + if targets: + tr.addpostclose(CAT_POSTCLOSE, make_auto_bundler(self)) + return tr + + @localrepo.unfilteredmethod + def clonebundles_lock(self, wait=True): + '''Lock the repository file related to clone bundles''' + if not util.safehasattr(self, '_cb_lock_ref'): + self._cb_lock_ref = None + l = self._currentlock(self._cb_lock_ref) + if l is not None: + l.lock() + return l + + l = self._lock( + vfs=self.vfs, + lockname=b"clonebundleslock", + wait=wait, + releasefn=None, + acquirefn=None, + desc=_(b'repository %s') % self.origroot, + ) + self._cb_lock_ref = weakref.ref(l) + return l + + repo._wlockfreeprefix.add(AUTO_GEN_FILE) + repo._wlockfreeprefix.add(bundlecaches.CB_MANIFEST_FILE) + repo.__class__ = autobundlesrepo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-clonebundles-autogen.t Mon Mar 13 17:34:18 2023 +0100 @@ -0,0 +1,70 @@ + +#require no-reposimplestore no-chg + +initial setup + + $ hg init server + $ cat >> server/.hg/hgrc << EOF + > [extensions] + > clonebundles = + > + > [clone-bundles] + > auto-generate.formats = v2 + > upload-command = cp "\$HGCB_BUNDLE_PATH" "$TESTTMP"/final-upload/ + > url-template = file://$TESTTMP/final-upload/{basename} + > + > [devel] + > debug.clonebundles=yes + > EOF + + $ mkdir final-upload + $ hg clone server client + updating to branch default + 0 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ cd client + +Test bundles are generated on push +================================== + + $ touch foo + $ hg -q commit -A -m 'add foo' + $ touch bar + $ hg -q commit -A -m 'add bar' + $ hg push + pushing to $TESTTMP/server + searching for changes + adding changesets + adding manifests + adding file changes + 2 changesets found + added 2 changesets with 2 changes to 2 files + clone-bundles: starting bundle generation: v2 + $ cat ../server/.hg/clonebundles.manifest + file:/*/$TESTTMP/final-upload/full-v2-2_revs-aaff8d2ffbbf_tip-*_txn.hg BUNDLESPEC=v2 REQUIRESNI=true (glob) + $ ls -1 ../final-upload + full-v2-2_revs-aaff8d2ffbbf_tip-*_txn.hg (glob) + $ ls -1 ../server/.hg/tmp-bundles + +Newer bundles are generated with more pushes +-------------------------------------------- + + $ touch baz + $ hg -q commit -A -m 'add baz' + $ touch buz + $ hg -q commit -A -m 'add buz' + $ hg push + pushing to $TESTTMP/server + searching for changes + adding changesets + adding manifests + adding file changes + 4 changesets found + added 2 changesets with 2 changes to 2 files + clone-bundles: starting bundle generation: v2 + + $ cat ../server/.hg/clonebundles.manifest + file:/*/$TESTTMP/final-upload/full-v2-4_revs-6427147b985a_tip-*_txn.hg BUNDLESPEC=v2 REQUIRESNI=true (glob) + $ ls -1 ../final-upload + full-v2-2_revs-aaff8d2ffbbf_tip-*_txn.hg (glob) + full-v2-4_revs-6427147b985a_tip-*_txn.hg (glob) + $ ls -1 ../server/.hg/tmp-bundles