packaging: split downloading code into own module
As we will introduce more code to support packaging, it will be
useful to have download code in its own module.
Differential Revision: https://phab.mercurial-scm.org/D6084
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/packaging/hgpackaging/downloads.py Thu Mar 07 15:42:32 2019 -0800
@@ -0,0 +1,116 @@
+# downloads.py - Code for downloading dependencies.
+#
+# Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+# no-check-code because Python 3 native.
+
+import gzip
+import hashlib
+import pathlib
+import urllib.request
+
+
+def hash_path(p: pathlib.Path):
+ h = hashlib.sha256()
+
+ with p.open('rb') as fh:
+ while True:
+ chunk = fh.read(65536)
+ if not chunk:
+ break
+
+ h.update(chunk)
+
+ return h.hexdigest()
+
+
+class IntegrityError(Exception):
+ """Represents an integrity error when downloading a URL."""
+
+
+def secure_download_stream(url, size, sha256):
+ """Securely download a URL to a stream of chunks.
+
+ If the integrity of the download fails, an IntegrityError is
+ raised.
+ """
+ h = hashlib.sha256()
+ length = 0
+
+ with urllib.request.urlopen(url) as fh:
+ if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip':
+ fh = gzip.GzipFile(fileobj=fh)
+
+ while True:
+ chunk = fh.read(65536)
+ if not chunk:
+ break
+
+ h.update(chunk)
+ length += len(chunk)
+
+ yield chunk
+
+ digest = h.hexdigest()
+
+ if length != size:
+ raise IntegrityError('size mismatch on %s: wanted %d; got %d' % (
+ url, size, length))
+
+ if digest != sha256:
+ raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % (
+ url, sha256, digest))
+
+
+def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str):
+ """Download a URL to a filesystem path, possibly with verification."""
+
+ # We download to a temporary file and rename at the end so there's
+ # no chance of the final file being partially written or containing
+ # bad data.
+ print('downloading %s to %s' % (url, path))
+
+ if path.exists():
+ good = True
+
+ if path.stat().st_size != size:
+ print('existing file size is wrong; removing')
+ good = False
+
+ if good:
+ if hash_path(path) != sha256:
+ print('existing file hash is wrong; removing')
+ good = False
+
+ if good:
+ print('%s exists and passes integrity checks' % path)
+ return
+
+ path.unlink()
+
+ tmp = path.with_name('%s.tmp' % path.name)
+
+ try:
+ with tmp.open('wb') as fh:
+ for chunk in secure_download_stream(url, size, sha256):
+ fh.write(chunk)
+ except IntegrityError:
+ tmp.unlink()
+ raise
+
+ tmp.rename(path)
+ print('successfully downloaded %s' % url)
+
+
+def download_entry(entry: dict, dest_path: pathlib.Path, local_name=None) -> pathlib.Path:
+ url = entry['url']
+
+ local_name = local_name or url[url.rindex('/') + 1:]
+
+ local_path = dest_path / local_name
+ download_to_path(url, local_path, entry['size'], entry['sha256'])
+
+ return local_path
--- a/contrib/packaging/hgpackaging/util.py Thu Mar 07 10:10:04 2019 -0800
+++ b/contrib/packaging/hgpackaging/util.py Thu Mar 07 15:42:32 2019 -0800
@@ -7,117 +7,11 @@
# no-check-code because Python 3 native.
-import gzip
-import hashlib
import pathlib
import tarfile
-import urllib.request
import zipfile
-def hash_path(p: pathlib.Path):
- h = hashlib.sha256()
-
- with p.open('rb') as fh:
- while True:
- chunk = fh.read(65536)
- if not chunk:
- break
-
- h.update(chunk)
-
- return h.hexdigest()
-
-
-class IntegrityError(Exception):
- """Represents an integrity error when downloading a URL."""
-
-
-def secure_download_stream(url, size, sha256):
- """Securely download a URL to a stream of chunks.
-
- If the integrity of the download fails, an IntegrityError is
- raised.
- """
- h = hashlib.sha256()
- length = 0
-
- with urllib.request.urlopen(url) as fh:
- if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip':
- fh = gzip.GzipFile(fileobj=fh)
-
- while True:
- chunk = fh.read(65536)
- if not chunk:
- break
-
- h.update(chunk)
- length += len(chunk)
-
- yield chunk
-
- digest = h.hexdigest()
-
- if length != size:
- raise IntegrityError('size mismatch on %s: wanted %d; got %d' % (
- url, size, length))
-
- if digest != sha256:
- raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % (
- url, sha256, digest))
-
-
-def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str):
- """Download a URL to a filesystem path, possibly with verification."""
-
- # We download to a temporary file and rename at the end so there's
- # no chance of the final file being partially written or containing
- # bad data.
- print('downloading %s to %s' % (url, path))
-
- if path.exists():
- good = True
-
- if path.stat().st_size != size:
- print('existing file size is wrong; removing')
- good = False
-
- if good:
- if hash_path(path) != sha256:
- print('existing file hash is wrong; removing')
- good = False
-
- if good:
- print('%s exists and passes integrity checks' % path)
- return
-
- path.unlink()
-
- tmp = path.with_name('%s.tmp' % path.name)
-
- try:
- with tmp.open('wb') as fh:
- for chunk in secure_download_stream(url, size, sha256):
- fh.write(chunk)
- except IntegrityError:
- tmp.unlink()
- raise
-
- tmp.rename(path)
- print('successfully downloaded %s' % url)
-
-
-def download_entry(entry: dict, dest_path: pathlib.Path, local_name=None) -> pathlib.Path:
- url = entry['url']
-
- local_name = local_name or url[url.rindex('/') + 1:]
-
- local_path = dest_path / local_name
- download_to_path(url, local_path, entry['size'], entry['sha256'])
-
- return local_path
-
-
def extract_tar_to_directory(source: pathlib.Path, dest: pathlib.Path):
with tarfile.open(source, 'r') as tf:
tf.extractall(dest)
--- a/contrib/packaging/inno/build.py Thu Mar 07 10:10:04 2019 -0800
+++ b/contrib/packaging/inno/build.py Thu Mar 07 15:42:32 2019 -0800
@@ -87,8 +87,10 @@
for finding the Python 2.7 toolchain. So, we require the environment
to already be configured with an active toolchain.
"""
+ from hgpackaging.downloads import (
+ download_entry,
+ )
from hgpackaging.util import (
- download_entry,
extract_tar_to_directory,
extract_zip_to_directory,
)
--- a/tests/test-check-code.t Thu Mar 07 10:10:04 2019 -0800
+++ b/tests/test-check-code.t Thu Mar 07 15:42:32 2019 -0800
@@ -12,6 +12,7 @@
> -X hgext/fsmonitor/pywatchman \
> -X mercurial/thirdparty \
> | sed 's-\\-/-g' | "$check_code" --warnings --per-file=0 - || false
+ Skipping contrib/packaging/hgpackaging/downloads.py it has no-che?k-code (glob)
Skipping contrib/packaging/hgpackaging/util.py it has no-che?k-code (glob)
Skipping contrib/packaging/inno/build.py it has no-che?k-code (glob)
Skipping i18n/polib.py it has no-che?k-code (glob)