# HG changeset patch # User Gregory Szorc # Date 1551982204 28800 # Node ID 9da97f49d4f49a75d275b0e095065ec28d49b44e # Parent 2b21c7fbb3a109dab329df8f6e79987cebbcb4de packaging: establish hgpackaging package Previously, contrib/packaging behaved as a root to a package directory and we had a "packagingutil" module. As I work more on packaging code, we'll want to have more code shared between different packaging tools. I think it makes sense to have a single package containing multiple modules than multiple top-level modules. This commit establishes an "hgpackaging" package by moving the existing packagingutil code to it. Differential Revision: https://phab.mercurial-scm.org/D6083 diff -r 2b21c7fbb3a1 -r 9da97f49d4f4 contrib/packaging/__init__.py diff -r 2b21c7fbb3a1 -r 9da97f49d4f4 contrib/packaging/hgpackaging/__init__.py diff -r 2b21c7fbb3a1 -r 9da97f49d4f4 contrib/packaging/hgpackaging/util.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/packaging/hgpackaging/util.py Thu Mar 07 10:10:04 2019 -0800 @@ -0,0 +1,128 @@ +# util.py - Common packaging utility code. +# +# Copyright 2019 Gregory Szorc +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +# no-check-code because Python 3 native. + +import gzip +import hashlib +import pathlib +import tarfile +import urllib.request +import zipfile + + +def hash_path(p: pathlib.Path): + h = hashlib.sha256() + + with p.open('rb') as fh: + while True: + chunk = fh.read(65536) + if not chunk: + break + + h.update(chunk) + + return h.hexdigest() + + +class IntegrityError(Exception): + """Represents an integrity error when downloading a URL.""" + + +def secure_download_stream(url, size, sha256): + """Securely download a URL to a stream of chunks. + + If the integrity of the download fails, an IntegrityError is + raised. + """ + h = hashlib.sha256() + length = 0 + + with urllib.request.urlopen(url) as fh: + if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip': + fh = gzip.GzipFile(fileobj=fh) + + while True: + chunk = fh.read(65536) + if not chunk: + break + + h.update(chunk) + length += len(chunk) + + yield chunk + + digest = h.hexdigest() + + if length != size: + raise IntegrityError('size mismatch on %s: wanted %d; got %d' % ( + url, size, length)) + + if digest != sha256: + raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % ( + url, sha256, digest)) + + +def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str): + """Download a URL to a filesystem path, possibly with verification.""" + + # We download to a temporary file and rename at the end so there's + # no chance of the final file being partially written or containing + # bad data. + print('downloading %s to %s' % (url, path)) + + if path.exists(): + good = True + + if path.stat().st_size != size: + print('existing file size is wrong; removing') + good = False + + if good: + if hash_path(path) != sha256: + print('existing file hash is wrong; removing') + good = False + + if good: + print('%s exists and passes integrity checks' % path) + return + + path.unlink() + + tmp = path.with_name('%s.tmp' % path.name) + + try: + with tmp.open('wb') as fh: + for chunk in secure_download_stream(url, size, sha256): + fh.write(chunk) + except IntegrityError: + tmp.unlink() + raise + + tmp.rename(path) + print('successfully downloaded %s' % url) + + +def download_entry(entry: dict, dest_path: pathlib.Path, local_name=None) -> pathlib.Path: + url = entry['url'] + + local_name = local_name or url[url.rindex('/') + 1:] + + local_path = dest_path / local_name + download_to_path(url, local_path, entry['size'], entry['sha256']) + + return local_path + + +def extract_tar_to_directory(source: pathlib.Path, dest: pathlib.Path): + with tarfile.open(source, 'r') as tf: + tf.extractall(dest) + + +def extract_zip_to_directory(source: pathlib.Path, dest: pathlib.Path): + with zipfile.ZipFile(source, 'r') as zf: + zf.extractall(dest) diff -r 2b21c7fbb3a1 -r 9da97f49d4f4 contrib/packaging/inno/build.py --- a/contrib/packaging/inno/build.py Sat Mar 09 02:07:09 2019 +0000 +++ b/contrib/packaging/inno/build.py Thu Mar 07 10:10:04 2019 -0800 @@ -87,7 +87,7 @@ for finding the Python 2.7 toolchain. So, we require the environment to already be configured with an active toolchain. """ - from packagingutil import ( + from hgpackaging.util import ( download_entry, extract_tar_to_directory, extract_zip_to_directory, diff -r 2b21c7fbb3a1 -r 9da97f49d4f4 contrib/packaging/packagingutil.py --- a/contrib/packaging/packagingutil.py Sat Mar 09 02:07:09 2019 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,128 +0,0 @@ -# packagingutil.py - Common packaging utility code. -# -# Copyright 2019 Gregory Szorc -# -# This software may be used and distributed according to the terms of the -# GNU General Public License version 2 or any later version. - -# no-check-code because Python 3 native. - -import gzip -import hashlib -import pathlib -import tarfile -import urllib.request -import zipfile - - -def hash_path(p: pathlib.Path): - h = hashlib.sha256() - - with p.open('rb') as fh: - while True: - chunk = fh.read(65536) - if not chunk: - break - - h.update(chunk) - - return h.hexdigest() - - -class IntegrityError(Exception): - """Represents an integrity error when downloading a URL.""" - - -def secure_download_stream(url, size, sha256): - """Securely download a URL to a stream of chunks. - - If the integrity of the download fails, an IntegrityError is - raised. - """ - h = hashlib.sha256() - length = 0 - - with urllib.request.urlopen(url) as fh: - if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip': - fh = gzip.GzipFile(fileobj=fh) - - while True: - chunk = fh.read(65536) - if not chunk: - break - - h.update(chunk) - length += len(chunk) - - yield chunk - - digest = h.hexdigest() - - if length != size: - raise IntegrityError('size mismatch on %s: wanted %d; got %d' % ( - url, size, length)) - - if digest != sha256: - raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % ( - url, sha256, digest)) - - -def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str): - """Download a URL to a filesystem path, possibly with verification.""" - - # We download to a temporary file and rename at the end so there's - # no chance of the final file being partially written or containing - # bad data. - print('downloading %s to %s' % (url, path)) - - if path.exists(): - good = True - - if path.stat().st_size != size: - print('existing file size is wrong; removing') - good = False - - if good: - if hash_path(path) != sha256: - print('existing file hash is wrong; removing') - good = False - - if good: - print('%s exists and passes integrity checks' % path) - return - - path.unlink() - - tmp = path.with_name('%s.tmp' % path.name) - - try: - with tmp.open('wb') as fh: - for chunk in secure_download_stream(url, size, sha256): - fh.write(chunk) - except IntegrityError: - tmp.unlink() - raise - - tmp.rename(path) - print('successfully downloaded %s' % url) - - -def download_entry(entry: dict, dest_path: pathlib.Path, local_name=None) -> pathlib.Path: - url = entry['url'] - - local_name = local_name or url[url.rindex('/') + 1:] - - local_path = dest_path / local_name - download_to_path(url, local_path, entry['size'], entry['sha256']) - - return local_path - - -def extract_tar_to_directory(source: pathlib.Path, dest: pathlib.Path): - with tarfile.open(source, 'r') as tf: - tf.extractall(dest) - - -def extract_zip_to_directory(source: pathlib.Path, dest: pathlib.Path): - with zipfile.ZipFile(source, 'r') as zf: - zf.extractall(dest) diff -r 2b21c7fbb3a1 -r 9da97f49d4f4 tests/test-check-code.t --- a/tests/test-check-code.t Sat Mar 09 02:07:09 2019 +0000 +++ b/tests/test-check-code.t Thu Mar 07 10:10:04 2019 -0800 @@ -12,8 +12,8 @@ > -X hgext/fsmonitor/pywatchman \ > -X mercurial/thirdparty \ > | sed 's-\\-/-g' | "$check_code" --warnings --per-file=0 - || false + Skipping contrib/packaging/hgpackaging/util.py it has no-che?k-code (glob) Skipping contrib/packaging/inno/build.py it has no-che?k-code (glob) - Skipping contrib/packaging/packagingutil.py it has no-che?k-code (glob) Skipping i18n/polib.py it has no-che?k-code (glob) Skipping mercurial/statprof.py it has no-che?k-code (glob) Skipping tests/badserverext.py it has no-che?k-code (glob) diff -r 2b21c7fbb3a1 -r 9da97f49d4f4 tests/test-check-module-imports.t --- a/tests/test-check-module-imports.t Sat Mar 09 02:07:09 2019 +0000 +++ b/tests/test-check-module-imports.t Thu Mar 07 10:10:04 2019 -0800 @@ -21,8 +21,8 @@ > -X contrib/debugshell.py \ > -X contrib/hgweb.fcgi \ > -X contrib/packaging/hg-docker \ + > -X contrib/packaging/hgpackaging/ \ > -X contrib/packaging/inno/ \ - > -X contrib/packaging/packagingutil.py \ > -X contrib/python-zstandard/ \ > -X contrib/win32/hgwebdir_wsgi.py \ > -X contrib/perf-utils/perf-revlog-write-plot.py \ diff -r 2b21c7fbb3a1 -r 9da97f49d4f4 tests/test-check-py3-compat.t --- a/tests/test-check-py3-compat.t Sat Mar 09 02:07:09 2019 +0000 +++ b/tests/test-check-py3-compat.t Thu Mar 07 10:10:04 2019 -0800 @@ -5,8 +5,8 @@ #if no-py3 $ testrepohg files 'set:(**.py)' \ + > -X contrib/packaging/hgpackaging/ \ > -X contrib/packaging/inno/ \ - > -X contrib/packaging/packagingutil.py \ > -X hgdemandimport/demandimportpy2.py \ > -X mercurial/thirdparty/cbor \ > | sed 's|\\|/|g' | xargs "$PYTHON" contrib/check-py3-compat.py