changeset 51982:aa7f4a45d8fa

clonebundles: allow manifest to specify sha256 digest of bundles
author Joerg Sonnenberger <joerg@bec.de>
date Thu, 27 Jun 2024 03:32:52 +0200
parents d07034819565
children 46afce95e5a5
files mercurial/bundlecaches.py mercurial/exchange.py mercurial/url.py tests/test-clonebundles.t
diffstat 4 files changed, 192 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/bundlecaches.py	Thu Oct 03 14:45:01 2024 +0200
+++ b/mercurial/bundlecaches.py	Thu Jun 27 03:32:52 2024 +0200
@@ -6,6 +6,7 @@
 from __future__ import annotations
 
 import collections
+import re
 import typing
 
 from typing import (
@@ -27,6 +28,7 @@
     error,
     requirements as requirementsmod,
     sslutil,
+    url as urlmod,
     util,
 )
 from .utils import stringutil
@@ -406,6 +408,9 @@
     return False
 
 
+digest_regex = re.compile(b'^[a-z0-9]+:[0-9a-f]+(,[a-z0-9]+:[0-9a-f]+)*$')
+
+
 def filterclonebundleentries(
     repo, entries, streamclonerequested=False, pullbundles=False
 ):
@@ -481,6 +486,43 @@
                 )
                 continue
 
+        if b'DIGEST' in entry:
+            if not digest_regex.match(entry[b'DIGEST']):
+                repo.ui.debug(
+                    b'filtering %s due to a bad DIGEST attribute\n' % url
+                )
+                continue
+            supported = 0
+            seen = {}
+            for digest_entry in entry[b'DIGEST'].split(b','):
+                algo, digest = digest_entry.split(b':')
+                if algo not in seen:
+                    seen[algo] = digest
+                elif seen[algo] != digest:
+                    repo.ui.debug(
+                        b'filtering %s due to conflicting %s digests\n'
+                        % (url, algo)
+                    )
+                    supported = 0
+                    break
+                digester = urlmod.digesthandler.digest_algorithms.get(algo)
+                if digester is None:
+                    continue
+                if len(digest) != digester().digest_size * 2:
+                    repo.ui.debug(
+                        b'filtering %s due to a bad %s digest\n' % (url, algo)
+                    )
+                    supported = 0
+                    break
+                supported += 1
+            else:
+                if supported == 0:
+                    repo.ui.debug(
+                        b'filtering %s due to lack of supported digest\n' % url
+                    )
+            if supported == 0:
+                continue
+
         newentries.append(entry)
 
     return newentries
--- a/mercurial/exchange.py	Thu Oct 03 14:45:01 2024 +0200
+++ b/mercurial/exchange.py	Thu Jun 27 03:32:52 2024 +0200
@@ -2900,8 +2900,23 @@
     entries = bundlecaches.sortclonebundleentries(repo.ui, entries)
 
     url = entries[0][b'URL']
+    digest = entries[0].get(b'DIGEST')
+    if digest:
+        algorithms = urlmod.digesthandler.digest_algorithms.keys()
+        preference = dict(zip(algorithms, range(len(algorithms))))
+        best_entry = None
+        best_preference = len(preference)
+        for digest_entry in digest.split(b','):
+            cur_algo, cur_digest = digest_entry.split(b':')
+            if cur_algo not in preference:
+                continue
+            if preference[cur_algo] < best_preference:
+                best_entry = digest_entry
+                best_preference = preference[cur_algo]
+        digest = best_entry
+
     repo.ui.status(_(b'applying clone bundle from %s\n') % url)
-    if trypullbundlefromurl(repo.ui, repo, url, remote):
+    if trypullbundlefromurl(repo.ui, repo, url, remote, digest):
         repo.ui.status(_(b'finished applying clone bundle\n'))
     # Bundle failed.
     #
@@ -2930,14 +2945,14 @@
     return util.chunkbuffer(peerclonebundle)
 
 
-def trypullbundlefromurl(ui, repo, url, peer):
+def trypullbundlefromurl(ui, repo, url, peer, digest):
     """Attempt to apply a bundle from a URL."""
     with repo.lock(), repo.transaction(b'bundleurl') as tr:
         try:
             if url.startswith(bundlecaches.CLONEBUNDLESCHEME):
                 fh = inline_clone_bundle_open(ui, url, peer)
             else:
-                fh = urlmod.open(ui, url)
+                fh = urlmod.open(ui, url, digest=digest)
             cg = readbundle(ui, fh, b'stream')
 
             if isinstance(cg, streamclone.streamcloneapplier):
--- a/mercurial/url.py	Thu Oct 03 14:45:01 2024 +0200
+++ b/mercurial/url.py	Thu Jun 27 03:32:52 2024 +0200
@@ -10,9 +10,11 @@
 from __future__ import annotations
 
 import base64
+import hashlib
 import socket
 
 from .i18n import _
+from .node import hex
 from . import (
     encoding,
     error,
@@ -499,6 +501,71 @@
     https_response = http_response
 
 
+class digesthandler(urlreq.basehandler):
+    # exchange.py assumes the algorithms are listed in order of preference,
+    # earlier entries are prefered.
+    digest_algorithms = {
+        b'sha256': hashlib.sha256,
+        b'sha512': hashlib.sha512,
+    }
+
+    def __init__(self, digest):
+        if b':' not in digest:
+            raise error.Abort(_(b'invalid digest specification'))
+        algo, checksum = digest.split(b':')
+        if algo not in self.digest_algorithms:
+            raise error.Abort(_(b'unsupported digest algorithm: %s') % algo)
+        self._digest = checksum
+        self._hasher = self.digest_algorithms[algo]()
+
+    def http_response(self, request, response):
+        class digestresponse(response.__class__):
+            def _digest_input(self, data):
+                self._hasher.update(data)
+                self._digest_consumed += len(data)
+                if self._digest_finished:
+                    digest = hex(self._hasher.digest())
+                    if digest != self._digest:
+                        raise error.SecurityError(
+                            _(
+                                b'file with digest %s expected, but %s found for %d bytes'
+                            )
+                            % (
+                                pycompat.bytestr(self._digest),
+                                pycompat.bytestr(digest),
+                                self._digest_consumed,
+                            )
+                        )
+
+            def read(self, amt=None):
+                data = super().read(amt)
+                self._digest_input(data)
+                return data
+
+            def readline(self):
+                data = super().readline()
+                self._digest_input(data)
+                return data
+
+            def readinto(self, dest):
+                got = super().readinto(dest)
+                self._digest_input(dest[:got])
+                return got
+
+            def _close_conn(self):
+                self._digest_finished = True
+                return super().close()
+
+        response.__class__ = digestresponse
+        response._digest = self._digest
+        response._digest_consumed = 0
+        response._hasher = self._hasher.copy()
+        response._digest_finished = False
+        return response
+
+    https_response = http_response
+
+
 handlerfuncs = []
 
 
@@ -510,6 +577,7 @@
     loggingname=b's',
     loggingopts=None,
     sendaccept=True,
+    digest=None,
 ):
     """
     construct an opener suitable for urllib2
@@ -562,6 +630,8 @@
     handlers.extend([h(ui, passmgr) for h in handlerfuncs])
     handlers.append(urlreq.httpcookieprocessor(cookiejar=load_cookiejar(ui)))
     handlers.append(readlinehandler())
+    if digest:
+        handlers.append(digesthandler(digest))
     opener = urlreq.buildopener(*handlers)
 
     # keepalive.py's handlers will populate these attributes if they exist.
@@ -600,7 +670,7 @@
     return opener
 
 
-def open(ui, url_, data=None, sendaccept=True):
+def open(ui, url_, data=None, sendaccept=True, digest=None):
     u = urlutil.url(url_)
     if u.scheme:
         u.scheme = u.scheme.lower()
@@ -611,7 +681,7 @@
             urlreq.pathname2url(pycompat.fsdecode(path))
         )
         authinfo = None
-    return opener(ui, authinfo, sendaccept=sendaccept).open(
+    return opener(ui, authinfo, sendaccept=sendaccept, digest=digest).open(
         pycompat.strurl(url_), data
     )
 
--- a/tests/test-clonebundles.t	Thu Oct 03 14:45:01 2024 +0200
+++ b/tests/test-clonebundles.t	Thu Jun 27 03:32:52 2024 +0200
@@ -743,6 +743,66 @@
   (sent 4 HTTP requests and * bytes; received * bytes in responses) (glob)
   $ killdaemons.py
 
+Testing a clone bundle with digest
+==================================
+
+  $ "$PYTHON" $TESTDIR/dumbhttp.py -p $HGPORT1 --pid http.pid
+  $ cat http.pid >> $DAEMON_PIDS
+  $ hg -R server serve -d -p $HGPORT --pid-file hg.pid --accesslog access.log
+  $ cat hg.pid >> $DAEMON_PIDS
+
+  $ digest=$("$PYTHON" -c "import hashlib; print (hashlib.sha256(open('gz-a.hg', 'rb').read()).hexdigest())")
+  $ cat > server/.hg/clonebundles.manifest << EOF
+  > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:${digest}
+  > EOF
+  $ hg clone -U http://localhost:$HGPORT digest-valid
+  applying clone bundle from http://localhost:$HGPORT1/gz-a.hg
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 2 changes to 2 files
+  finished applying clone bundle
+  searching for changes
+  no changes found
+  2 local changesets published
+  $ digest_bad=$("$PYTHON" -c "import hashlib; print (hashlib.sha256(open('gz-a.hg', 'rb').read()+b'.').hexdigest())")
+  $ cat > server/.hg/clonebundles.manifest << EOF
+  > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:${digest_bad}
+  > EOF
+  $ hg clone -U  http://localhost:$HGPORT digest-invalid
+  applying clone bundle from http://localhost:$HGPORT1/gz-a.hg
+  abort: file with digest [0-9a-f]* expected, but [0-9a-f]* found for [0-9]* bytes (re)
+  [150]
+  $ cat > server/.hg/clonebundles.manifest << EOF
+  > http://localhost:$HGPORT1/bad-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:xx
+  > http://localhost:$HGPORT1/bad-b.hg BUNDLESPEC=gzip-v2 DIGEST=xxx:0000
+  > http://localhost:$HGPORT1/bad-c.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:0000
+  > http://localhost:$HGPORT1/bad-d.hg BUNDLESPEC=gzip-v2 DIGEST=xxx:00,xxx:01
+  > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha256:${digest_bad}
+  > EOF
+  $ hg clone --debug -U  http://localhost:$HGPORT digest-malformed
+  using http://localhost:$HGPORT/
+  sending capabilities command
+  sending clonebundles_manifest command
+  filtering http://localhost:$HGPORT1/bad-a.hg due to a bad DIGEST attribute
+  filtering http://localhost:$HGPORT1/bad-b.hg due to lack of supported digest
+  filtering http://localhost:$HGPORT1/bad-c.hg due to a bad sha256 digest
+  filtering http://localhost:$HGPORT1/bad-d.hg due to conflicting xxx digests
+  applying clone bundle from http://localhost:$HGPORT1/gz-a.hg
+  bundle2-input-bundle: 1 params with-transaction
+  bundle2-input-bundle: 0 parts total
+  \(sent [0-9]* HTTP requests and [0-9]* bytes; received [0-9]* bytes in responses\) (re)
+  abort: file with digest [0-9a-f]* expected, but [0-9a-f]* found for [0-9]* bytes (re)
+  [150]
+  $ cat > server/.hg/clonebundles.manifest << EOF
+  > http://localhost:$HGPORT1/gz-a.hg BUNDLESPEC=gzip-v2 DIGEST=sha512:00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,sha256:0000000000000000000000000000000000000000000000000000000000000000
+  > EOF
+  $ hg clone -U  http://localhost:$HGPORT digest-preference
+  applying clone bundle from http://localhost:$HGPORT1/gz-a.hg
+  abort: file with digest 0{64} expected, but [0-9a-f]+ found for [0-9]+ bytes (re)
+  [150]
+  $ killdaemons.py
+
 Testing a clone bundles that involves revlog splitting (issue6811)
 ==================================================================