view contrib/packaging/hgpackaging/downloads.py @ 51723:9367571fea21

cext: correct the argument handling of `b85encode()` The type stub indicated that this argument is `Optional`, which implies None is allowed. I don't see in the documentation where that's the case for `i`[1], and trying it in `hg debugshell` resulted in the method failing with a TypeError. I guess it was typed as an `int` argument because the `p` format unit wasn't added until Python 3.3[2]. In any event, 2 clients in core (`pvec` and `obsolete`) call this with no argument supplied, and `mdiff` calls it with True. So I guess we've avoided the None arg case, and when no arg is supplied, it defaults to the 0 initialization of the `pad` variable in C. Since the `p` format unit accepts both `int` and None, as well as `bool`, I'm not bothering to bump the module version- this code is more permissive than it was, in addition to being more correct. Interestingly, when I first imported the `cext` and `pure` methods in the same manner as the previous commit, it dropped the `Optional` part of the argument type when generating `util.pyi`. No idea why. [1] https://docs.python.org/3/c-api/arg.html#numbers [2] https://docs.python.org/3/c-api/arg.html#other-objects
author Matt Harbison <matt_harbison@yahoo.com>
date Sat, 20 Jul 2024 01:55:09 -0400
parents a7fe96abcf56
children
line wrap: on
line source

# downloads.py - Code for downloading dependencies.
#
# Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

# no-check-code because Python 3 native.

import gzip
import hashlib
import pathlib
import typing
import urllib.request


DOWNLOADS = {
    'gettext': {
        'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-bin.zip',
        'size': 1606131,
        'sha256': '60b9ef26bc5cceef036f0424e542106cf158352b2677f43a01affd6d82a1d641',
        'version': '0.14.4',
    },
    'gettext-dep': {
        'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-dep.zip',
        'size': 715086,
        'sha256': '411f94974492fd2ecf52590cb05b1023530aec67e64154a88b1e4ebcd9c28588',
    },
}


def hash_path(p: pathlib.Path):
    h = hashlib.sha256()

    with p.open('rb') as fh:
        while True:
            chunk = fh.read(65536)
            if not chunk:
                break

            h.update(chunk)

    return h.hexdigest()


class IntegrityError(Exception):
    """Represents an integrity error when downloading a URL."""


def secure_download_stream(url, size, sha256):
    """Securely download a URL to a stream of chunks.

    If the integrity of the download fails, an IntegrityError is
    raised.
    """
    h = hashlib.sha256()
    length = 0

    with urllib.request.urlopen(url) as fh:
        if (
            not url.endswith('.gz')
            and fh.info().get('Content-Encoding') == 'gzip'
        ):
            fh = gzip.GzipFile(fileobj=fh)

        while True:
            chunk = fh.read(65536)
            if not chunk:
                break

            h.update(chunk)
            length += len(chunk)

            yield chunk

    digest = h.hexdigest()

    if length != size:
        raise IntegrityError(
            'size mismatch on %s: wanted %d; got %d' % (url, size, length)
        )

    if digest != sha256:
        raise IntegrityError(
            'sha256 mismatch on %s: wanted %s; got %s' % (url, sha256, digest)
        )


def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str):
    """Download a URL to a filesystem path, possibly with verification."""

    # We download to a temporary file and rename at the end so there's
    # no chance of the final file being partially written or containing
    # bad data.
    print('downloading %s to %s' % (url, path))

    if path.exists():
        good = True

        if path.stat().st_size != size:
            print('existing file size is wrong; removing')
            good = False

        if good:
            if hash_path(path) != sha256:
                print('existing file hash is wrong; removing')
                good = False

        if good:
            print('%s exists and passes integrity checks' % path)
            return

        path.unlink()

    tmp = path.with_name('%s.tmp' % path.name)

    try:
        with tmp.open('wb') as fh:
            for chunk in secure_download_stream(url, size, sha256):
                fh.write(chunk)
    except IntegrityError:
        tmp.unlink()
        raise

    tmp.rename(path)
    print('successfully downloaded %s' % url)


def download_entry(
    name: str, dest_path: pathlib.Path, local_name=None
) -> typing.Tuple[pathlib.Path, typing.Dict[str, typing.Union[str, int]]]:
    entry = DOWNLOADS[name]

    url = entry['url']

    local_name = local_name or url[url.rindex('/') + 1 :]

    local_path = dest_path / local_name
    download_to_path(url, local_path, entry['size'], entry['sha256'])

    return local_path, entry