# HG changeset patch # User Gregory Szorc # Date 1552002152 28800 # Node ID c2237fe1359eee4213f2eef843b86368a3a410e3 # Parent 9da97f49d4f49a75d275b0e095065ec28d49b44e packaging: split downloading code into own module As we will introduce more code to support packaging, it will be useful to have download code in its own module. Differential Revision: https://phab.mercurial-scm.org/D6084 diff -r 9da97f49d4f4 -r c2237fe1359e contrib/packaging/hgpackaging/downloads.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/packaging/hgpackaging/downloads.py Thu Mar 07 15:42:32 2019 -0800 @@ -0,0 +1,116 @@ +# downloads.py - Code for downloading dependencies. +# +# Copyright 2019 Gregory Szorc +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +# no-check-code because Python 3 native. + +import gzip +import hashlib +import pathlib +import urllib.request + + +def hash_path(p: pathlib.Path): + h = hashlib.sha256() + + with p.open('rb') as fh: + while True: + chunk = fh.read(65536) + if not chunk: + break + + h.update(chunk) + + return h.hexdigest() + + +class IntegrityError(Exception): + """Represents an integrity error when downloading a URL.""" + + +def secure_download_stream(url, size, sha256): + """Securely download a URL to a stream of chunks. + + If the integrity of the download fails, an IntegrityError is + raised. + """ + h = hashlib.sha256() + length = 0 + + with urllib.request.urlopen(url) as fh: + if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip': + fh = gzip.GzipFile(fileobj=fh) + + while True: + chunk = fh.read(65536) + if not chunk: + break + + h.update(chunk) + length += len(chunk) + + yield chunk + + digest = h.hexdigest() + + if length != size: + raise IntegrityError('size mismatch on %s: wanted %d; got %d' % ( + url, size, length)) + + if digest != sha256: + raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % ( + url, sha256, digest)) + + +def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str): + """Download a URL to a filesystem path, possibly with verification.""" + + # We download to a temporary file and rename at the end so there's + # no chance of the final file being partially written or containing + # bad data. + print('downloading %s to %s' % (url, path)) + + if path.exists(): + good = True + + if path.stat().st_size != size: + print('existing file size is wrong; removing') + good = False + + if good: + if hash_path(path) != sha256: + print('existing file hash is wrong; removing') + good = False + + if good: + print('%s exists and passes integrity checks' % path) + return + + path.unlink() + + tmp = path.with_name('%s.tmp' % path.name) + + try: + with tmp.open('wb') as fh: + for chunk in secure_download_stream(url, size, sha256): + fh.write(chunk) + except IntegrityError: + tmp.unlink() + raise + + tmp.rename(path) + print('successfully downloaded %s' % url) + + +def download_entry(entry: dict, dest_path: pathlib.Path, local_name=None) -> pathlib.Path: + url = entry['url'] + + local_name = local_name or url[url.rindex('/') + 1:] + + local_path = dest_path / local_name + download_to_path(url, local_path, entry['size'], entry['sha256']) + + return local_path diff -r 9da97f49d4f4 -r c2237fe1359e contrib/packaging/hgpackaging/util.py --- a/contrib/packaging/hgpackaging/util.py Thu Mar 07 10:10:04 2019 -0800 +++ b/contrib/packaging/hgpackaging/util.py Thu Mar 07 15:42:32 2019 -0800 @@ -7,117 +7,11 @@ # no-check-code because Python 3 native. -import gzip -import hashlib import pathlib import tarfile -import urllib.request import zipfile -def hash_path(p: pathlib.Path): - h = hashlib.sha256() - - with p.open('rb') as fh: - while True: - chunk = fh.read(65536) - if not chunk: - break - - h.update(chunk) - - return h.hexdigest() - - -class IntegrityError(Exception): - """Represents an integrity error when downloading a URL.""" - - -def secure_download_stream(url, size, sha256): - """Securely download a URL to a stream of chunks. - - If the integrity of the download fails, an IntegrityError is - raised. - """ - h = hashlib.sha256() - length = 0 - - with urllib.request.urlopen(url) as fh: - if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip': - fh = gzip.GzipFile(fileobj=fh) - - while True: - chunk = fh.read(65536) - if not chunk: - break - - h.update(chunk) - length += len(chunk) - - yield chunk - - digest = h.hexdigest() - - if length != size: - raise IntegrityError('size mismatch on %s: wanted %d; got %d' % ( - url, size, length)) - - if digest != sha256: - raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % ( - url, sha256, digest)) - - -def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str): - """Download a URL to a filesystem path, possibly with verification.""" - - # We download to a temporary file and rename at the end so there's - # no chance of the final file being partially written or containing - # bad data. - print('downloading %s to %s' % (url, path)) - - if path.exists(): - good = True - - if path.stat().st_size != size: - print('existing file size is wrong; removing') - good = False - - if good: - if hash_path(path) != sha256: - print('existing file hash is wrong; removing') - good = False - - if good: - print('%s exists and passes integrity checks' % path) - return - - path.unlink() - - tmp = path.with_name('%s.tmp' % path.name) - - try: - with tmp.open('wb') as fh: - for chunk in secure_download_stream(url, size, sha256): - fh.write(chunk) - except IntegrityError: - tmp.unlink() - raise - - tmp.rename(path) - print('successfully downloaded %s' % url) - - -def download_entry(entry: dict, dest_path: pathlib.Path, local_name=None) -> pathlib.Path: - url = entry['url'] - - local_name = local_name or url[url.rindex('/') + 1:] - - local_path = dest_path / local_name - download_to_path(url, local_path, entry['size'], entry['sha256']) - - return local_path - - def extract_tar_to_directory(source: pathlib.Path, dest: pathlib.Path): with tarfile.open(source, 'r') as tf: tf.extractall(dest) diff -r 9da97f49d4f4 -r c2237fe1359e contrib/packaging/inno/build.py --- a/contrib/packaging/inno/build.py Thu Mar 07 10:10:04 2019 -0800 +++ b/contrib/packaging/inno/build.py Thu Mar 07 15:42:32 2019 -0800 @@ -87,8 +87,10 @@ for finding the Python 2.7 toolchain. So, we require the environment to already be configured with an active toolchain. """ + from hgpackaging.downloads import ( + download_entry, + ) from hgpackaging.util import ( - download_entry, extract_tar_to_directory, extract_zip_to_directory, ) diff -r 9da97f49d4f4 -r c2237fe1359e tests/test-check-code.t --- a/tests/test-check-code.t Thu Mar 07 10:10:04 2019 -0800 +++ b/tests/test-check-code.t Thu Mar 07 15:42:32 2019 -0800 @@ -12,6 +12,7 @@ > -X hgext/fsmonitor/pywatchman \ > -X mercurial/thirdparty \ > | sed 's-\\-/-g' | "$check_code" --warnings --per-file=0 - || false + Skipping contrib/packaging/hgpackaging/downloads.py it has no-che?k-code (glob) Skipping contrib/packaging/hgpackaging/util.py it has no-che?k-code (glob) Skipping contrib/packaging/inno/build.py it has no-che?k-code (glob) Skipping i18n/polib.py it has no-che?k-code (glob)