# HG changeset patch # User Joerg Sonnenberger # Date 1719451972 -7200 # Node ID aa7f4a45d8fa66c71d761311d91f07ccf39c6a7a # Parent d0703481956566b7744982a0999a621373bd66d0 clonebundles: allow manifest to specify sha256 digest of bundles diff -r d07034819565 -r aa7f4a45d8fa mercurial/bundlecaches.py --- a/mercurial/bundlecaches.py Thu Oct 03 14:45:01 2024 +0200 +++ b/mercurial/bundlecaches.py Thu Jun 27 03:32:52 2024 +0200 @@ -6,6 +6,7 @@ from __future__ import annotations import collections +import re import typing from typing import ( @@ -27,6 +28,7 @@ error, requirements as requirementsmod, sslutil, + url as urlmod, util, ) from .utils import stringutil @@ -406,6 +408,9 @@ return False +digest_regex = re.compile(b'^[a-z0-9]+:[0-9a-f]+(,[a-z0-9]+:[0-9a-f]+)*$') + + def filterclonebundleentries( repo, entries, streamclonerequested=False, pullbundles=False ): @@ -481,6 +486,43 @@ ) continue + if b'DIGEST' in entry: + if not digest_regex.match(entry[b'DIGEST']): + repo.ui.debug( + b'filtering %s due to a bad DIGEST attribute\n' % url + ) + continue + supported = 0 + seen = {} + for digest_entry in entry[b'DIGEST'].split(b','): + algo, digest = digest_entry.split(b':') + if algo not in seen: + seen[algo] = digest + elif seen[algo] != digest: + repo.ui.debug( + b'filtering %s due to conflicting %s digests\n' + % (url, algo) + ) + supported = 0 + break + digester = urlmod.digesthandler.digest_algorithms.get(algo) + if digester is None: + continue + if len(digest) != digester().digest_size * 2: + repo.ui.debug( + b'filtering %s due to a bad %s digest\n' % (url, algo) + ) + supported = 0 + break + supported += 1 + else: + if supported == 0: + repo.ui.debug( + b'filtering %s due to lack of supported digest\n' % url + ) + if supported == 0: + continue + newentries.append(entry) return newentries diff -r d07034819565 -r aa7f4a45d8fa mercurial/exchange.py --- a/mercurial/exchange.py Thu Oct 03 14:45:01 2024 +0200 +++ b/mercurial/exchange.py Thu Jun 27 03:32:52 2024 +0200 @@ -2900,8 +2900,23 @@ entries = bundlecaches.sortclonebundleentries(repo.ui, entries) url = entries[0][b'URL'] + digest = entries[0].get(b'DIGEST') + if digest: + algorithms = urlmod.digesthandler.digest_algorithms.keys() + preference = dict(zip(algorithms, range(len(algorithms)))) + best_entry = None + best_preference = len(preference) + for digest_entry in digest.split(b','): + cur_algo, cur_digest = digest_entry.split(b':') + if cur_algo not in preference: + continue + if preference[cur_algo] < best_preference: + best_entry = digest_entry + best_preference = preference[cur_algo] + digest = best_entry + repo.ui.status(_(b'applying clone bundle from %s\n') % url) - if trypullbundlefromurl(repo.ui, repo, url, remote): + if trypullbundlefromurl(repo.ui, repo, url, remote, digest): repo.ui.status(_(b'finished applying clone bundle\n')) # Bundle failed. # @@ -2930,14 +2945,14 @@ return util.chunkbuffer(peerclonebundle) -def trypullbundlefromurl(ui, repo, url, peer): +def trypullbundlefromurl(ui, repo, url, peer, digest): """Attempt to apply a bundle from a URL.""" with repo.lock(), repo.transaction(b'bundleurl') as tr: try: if url.startswith(bundlecaches.CLONEBUNDLESCHEME): fh = inline_clone_bundle_open(ui, url, peer) else: - fh = urlmod.open(ui, url) + fh = urlmod.open(ui, url, digest=digest) cg = readbundle(ui, fh, b'stream') if isinstance(cg, streamclone.streamcloneapplier): diff -r d07034819565 -r aa7f4a45d8fa mercurial/url.py --- a/mercurial/url.py Thu Oct 03 14:45:01 2024 +0200 +++ b/mercurial/url.py Thu Jun 27 03:32:52 2024 +0200 @@ -10,9 +10,11 @@ from __future__ import annotations import base64 +import hashlib import socket from .i18n import _ +from .node import hex from . import ( encoding, error, @@ -499,6 +501,71 @@ https_response = http_response +class digesthandler(urlreq.basehandler): + # exchange.py assumes the algorithms are listed in order of preference, + # earlier entries are prefered. + digest_algorithms = { + b'sha256': hashlib.sha256, + b'sha512': hashlib.sha512, + } + + def __init__(self, digest): + if b':' not in digest: + raise error.Abort(_(b'invalid digest specification')) + algo, checksum = digest.split(b':') + if algo not in self.digest_algorithms: + raise error.Abort(_(b'unsupported digest algorithm: %s') % algo) + self._digest = checksum + self._hasher = self.digest_algorithms[algo]() + + def http_response(self, request, response): + class digestresponse(response.__class__): + def _digest_input(self, data): + self._hasher.update(data) + self._digest_consumed += len(data) + if self._digest_finished: + digest = hex(self._hasher.digest()) + if digest != self._digest: + raise error.SecurityError( + _( + b'file with digest %s expected, but %s found for %d bytes' + ) + % ( + pycompat.bytestr(self._digest), + pycompat.bytestr(digest), + self._digest_consumed, + ) + ) + + def read(self, amt=None): + data = super().read(amt) + self._digest_input(data) + return data + + def readline(self): + data = super().readline() + self._digest_input(data) + return data + + def readinto(self, dest): + got = super().readinto(dest) + self._digest_input(dest[:got]) + return got + + def _close_conn(self): + self._digest_finished = True + return super().close() + + response.__class__ = digestresponse + response._digest = self._digest + response._digest_consumed = 0 + response._hasher = self._hasher.copy() + response._digest_finished = False + return response + + https_response = http_response + + handlerfuncs = [] @@ -510,6 +577,7 @@ loggingname=b's', loggingopts=None, sendaccept=True, + digest=None, ): """ construct an opener suitable for urllib2 @@ -562,6 +630,8 @@ handlers.extend([h(ui, passmgr) for h in handlerfuncs]) handlers.append(urlreq.httpcookieprocessor(cookiejar=load_cookiejar(ui))) handlers.append(readlinehandler()) + if digest: + handlers.append(digesthandler(digest)) opener = urlreq.buildopener(*handlers) # keepalive.py's handlers will populate these attributes if they exist. @@ -600,7 +670,7 @@ return opener -def open(ui, url_, data=None, sendaccept=True): +def open(ui, url_, data=None, sendaccept=True, digest=None): u = urlutil.url(url_) if u.scheme: u.scheme = u.scheme.lower() @@ -611,7 +681,7 @@ urlreq.pathname2url(pycompat.fsdecode(path)) ) authinfo = None - return opener(ui, authinfo, sendaccept=sendaccept).open( + return opener(ui, authinfo, sendaccept=sendaccept, digest=digest).open( pycompat.strurl(url_), data ) diff -r d07034819565 -r aa7f4a45d8fa tests/test-clonebundles.t --- a/tests/test-clonebundles.t Thu Oct 03 14:45:01 2024 +0200 +++ b/tests/test-clonebundles.t Thu Jun 27 03:32:52 2024 +0200 @@ -743,6 +743,66 @@ (sent 4 HTTP requests and * bytes; received * bytes in responses) (glob) $ killdaemons.py +Testing a clone bundle with digest +================================== + + $ "$PYTHON" $TESTDIR/dumbhttp.py -p $HGPORT1 --pid http.pid + $ cat http.pid >> $DAEMON_PIDS + $ hg -R server serve -d -p $HGPORT --pid-file hg.pid --accesslog access.log + $ cat hg.pid >> $DAEMON_PIDS + + $ digest=$("$PYTHON" -c "import hashlib; print (hashlib.sha256(open('gz-a.hg', 'rb').read()).hexdigest())") + $ cat > server/.hg/clonebundles.manifest << EOF + > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:${digest} + > EOF + $ hg clone -U http://localhost:$HGPORT digest-valid + applying clone bundle from http://localhost:$HGPORT1/gz-a.hg + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 2 files + finished applying clone bundle + searching for changes + no changes found + 2 local changesets published + $ digest_bad=$("$PYTHON" -c "import hashlib; print (hashlib.sha256(open('gz-a.hg', 'rb').read()+b'.').hexdigest())") + $ cat > server/.hg/clonebundles.manifest << EOF + > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:${digest_bad} + > EOF + $ hg clone -U http://localhost:$HGPORT digest-invalid + applying clone bundle from http://localhost:$HGPORT1/gz-a.hg + abort: file with digest [0-9a-f]* expected, but [0-9a-f]* found for [0-9]* bytes (re) + [150] + $ cat > server/.hg/clonebundles.manifest << EOF + > http://localhost:$HGPORT1/bad-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:xx + > http://localhost:$HGPORT1/bad-b.hg BUNDLESPEC=gzip-v2 DIGEST=xxx:0000 + > http://localhost:$HGPORT1/bad-c.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:0000 + > http://localhost:$HGPORT1/bad-d.hg BUNDLESPEC=gzip-v2 DIGEST=xxx:00,xxx:01 + > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:${digest_bad} + > EOF + $ hg clone --debug -U http://localhost:$HGPORT digest-malformed + using http://localhost:$HGPORT/ + sending capabilities command + sending clonebundles_manifest command + filtering http://localhost:$HGPORT1/bad-a.hg due to a bad DIGEST attribute + filtering http://localhost:$HGPORT1/bad-b.hg due to lack of supported digest + filtering http://localhost:$HGPORT1/bad-c.hg due to a bad sha256 digest + filtering http://localhost:$HGPORT1/bad-d.hg due to conflicting xxx digests + applying clone bundle from http://localhost:$HGPORT1/gz-a.hg + bundle2-input-bundle: 1 params with-transaction + bundle2-input-bundle: 0 parts total + \(sent [0-9]* HTTP requests and [0-9]* bytes; received [0-9]* bytes in responses\) (re) + abort: file with digest [0-9a-f]* expected, but [0-9a-f]* found for [0-9]* bytes (re) + [150] + $ cat > server/.hg/clonebundles.manifest << EOF + > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha512:00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,sha256:0000000000000000000000000000000000000000000000000000000000000000 + > EOF + $ hg clone -U http://localhost:$HGPORT digest-preference + applying clone bundle from http://localhost:$HGPORT1/gz-a.hg + abort: file with digest 0{64} expected, but [0-9a-f]+ found for [0-9]+ bytes (re) + [150] + $ killdaemons.py + Testing a clone bundles that involves revlog splitting (issue6811) ==================================================================