contrib/packaging/hgpackaging/downloads.py
author Pierre-Yves DAVID <pierre-yves.david@octobus.net>
Wed, 01 Jun 2022 01:32:45 +0200
changeset 49270 251650844331
parent 49136 a7fe96abcf56
permissions -rw-r--r--
debugindex: add a `p1-rev` column This will be useful in case of corrupted index.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
41957
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
     1
# downloads.py - Code for downloading dependencies.
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
     2
#
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
     3
# Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
     4
#
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
     5
# This software may be used and distributed according to the terms of the
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
     6
# GNU General Public License version 2 or any later version.
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
     7
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
     8
# no-check-code because Python 3 native.
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
     9
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    10
import gzip
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    11
import hashlib
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    12
import pathlib
49136
a7fe96abcf56 packaging: fix the type hint on the download_entry function
Matt Harbison <matt_harbison@yahoo.com>
parents: 48939
diff changeset
    13
import typing
41957
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    14
import urllib.request
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    15
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    16
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    17
DOWNLOADS = {
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    18
    'gettext': {
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    19
        'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-bin.zip',
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    20
        'size': 1606131,
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    21
        'sha256': '60b9ef26bc5cceef036f0424e542106cf158352b2677f43a01affd6d82a1d641',
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    22
        'version': '0.14.4',
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    23
    },
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    24
    'gettext-dep': {
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    25
        'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-dep.zip',
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    26
        'size': 715086,
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    27
        'sha256': '411f94974492fd2ecf52590cb05b1023530aec67e64154a88b1e4ebcd9c28588',
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    28
    },
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    29
}
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    30
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    31
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    32
def hash_path(p: pathlib.Path):
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    33
    h = hashlib.sha256()
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    34
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    35
    with p.open('rb') as fh:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    36
        while True:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    37
            chunk = fh.read(65536)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    38
            if not chunk:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    39
                break
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    40
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    41
            h.update(chunk)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    42
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    43
    return h.hexdigest()
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    44
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    45
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    46
class IntegrityError(Exception):
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    47
    """Represents an integrity error when downloading a URL."""
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    48
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    49
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    50
def secure_download_stream(url, size, sha256):
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    51
    """Securely download a URL to a stream of chunks.
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    52
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    53
    If the integrity of the download fails, an IntegrityError is
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    54
    raised.
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    55
    """
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    56
    h = hashlib.sha256()
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    57
    length = 0
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    58
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    59
    with urllib.request.urlopen(url) as fh:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    60
        if (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    61
            not url.endswith('.gz')
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    62
            and fh.info().get('Content-Encoding') == 'gzip'
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    63
        ):
41957
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    64
            fh = gzip.GzipFile(fileobj=fh)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    65
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    66
        while True:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    67
            chunk = fh.read(65536)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    68
            if not chunk:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    69
                break
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    70
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    71
            h.update(chunk)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    72
            length += len(chunk)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    73
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    74
            yield chunk
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    75
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    76
    digest = h.hexdigest()
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    77
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    78
    if length != size:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    79
        raise IntegrityError(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    80
            'size mismatch on %s: wanted %d; got %d' % (url, size, length)
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    81
        )
41957
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    82
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    83
    if digest != sha256:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    84
        raise IntegrityError(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    85
            'sha256 mismatch on %s: wanted %s; got %s' % (url, sha256, digest)
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
    86
        )
41957
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    87
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    88
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    89
def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str):
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    90
    """Download a URL to a filesystem path, possibly with verification."""
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    91
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    92
    # We download to a temporary file and rename at the end so there's
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    93
    # no chance of the final file being partially written or containing
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    94
    # bad data.
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    95
    print('downloading %s to %s' % (url, path))
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    96
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    97
    if path.exists():
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    98
        good = True
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
    99
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   100
        if path.stat().st_size != size:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   101
            print('existing file size is wrong; removing')
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   102
            good = False
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   103
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   104
        if good:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   105
            if hash_path(path) != sha256:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   106
                print('existing file hash is wrong; removing')
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   107
                good = False
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   108
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   109
        if good:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   110
            print('%s exists and passes integrity checks' % path)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   111
            return
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   112
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   113
        path.unlink()
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   114
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   115
    tmp = path.with_name('%s.tmp' % path.name)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   116
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   117
    try:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   118
        with tmp.open('wb') as fh:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   119
            for chunk in secure_download_stream(url, size, sha256):
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   120
                fh.write(chunk)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   121
    except IntegrityError:
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   122
        tmp.unlink()
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   123
        raise
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   124
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   125
    tmp.rename(path)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   126
    print('successfully downloaded %s' % url)
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   127
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   128
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
   129
def download_entry(
49136
a7fe96abcf56 packaging: fix the type hint on the download_entry function
Matt Harbison <matt_harbison@yahoo.com>
parents: 48939
diff changeset
   130
    name: str, dest_path: pathlib.Path, local_name=None
a7fe96abcf56 packaging: fix the type hint on the download_entry function
Matt Harbison <matt_harbison@yahoo.com>
parents: 48939
diff changeset
   131
) -> typing.Tuple[pathlib.Path, typing.Dict[str, typing.Union[str, int]]]:
41957
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   132
    entry = DOWNLOADS[name]
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   133
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   134
    url = entry['url']
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   135
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41957
diff changeset
   136
    local_name = local_name or url[url.rindex('/') + 1 :]
41957
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   137
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   138
    local_path = dest_path / local_name
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   139
    download_to_path(url, local_path, entry['size'], entry['sha256'])
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   140
b83de9150c1c packaging: convert files to LF
Gregory Szorc <gregory.szorc@gmail.com>
parents: 41926
diff changeset
   141
    return local_path, entry