Mercurial > hg
changeset 35476:417e8e040102
lfs: verify lfs object content when transferring to and from the remote store
This avoids inserting corrupt files into the usercache, and local and remote
stores. One down side is that the bad file won't be available locally for
forensic purposes after a remote download. I'm thinking about adding an
'incoming' directory to the local lfs store to handle the download, and then
move it to the 'objects' directory after it passes verification. That would
have the additional benefit of not concatenating each transfer chunk in memory
until the full file is transferred.
Verification isn't needed when the data is passed back through the revlog
interface or when the oid was just calculated, but otherwise it is on by
default. The additional overhead should be well worth avoiding problems with
file based remote stores, or buggy lfs servers.
Having two different verify functions is a little sad, but the full data of the
blob is mostly passed around in memory, because that's what the revlog interface
wants. The upload function, however, chunks up the data. It would be ideal if
that was how the content is always handled, but that's probably a huge project.
I don't really like printing the long hash, but `hg debugdata` isn't a public
interface, and is the only way to get it. The filelog and revision info is
nowhere near this area, so recommending `hg verify` is the easiest thing to do.
author | Matt Harbison <matt_harbison@yahoo.com> |
---|---|
date | Fri, 17 Nov 2017 00:06:45 -0500 |
parents | b0c01a5ee35c |
children | bb6a80fc969a |
files | hgext/lfs/blobstore.py hgext/lfs/wrapper.py tests/test-lfs-test-server.t tests/test-lfs.t |
diffstat | 4 files changed, 74 insertions(+), 58 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/lfs/blobstore.py Mon Dec 04 21:41:04 2017 -0500 +++ b/hgext/lfs/blobstore.py Fri Nov 17 00:06:45 2017 -0500 @@ -7,6 +7,7 @@ from __future__ import absolute_import +import hashlib import json import os import re @@ -99,8 +100,11 @@ self.cachevfs = lfsvfs(usercache) self.ui = repo.ui - def write(self, oid, data): + def write(self, oid, data, verify=True): """Write blob to local blobstore.""" + if verify: + _verify(oid, data) + with self.vfs(oid, 'wb', atomictemp=True) as fp: fp.write(data) @@ -110,14 +114,23 @@ self.ui.note(_('lfs: adding %s to the usercache\n') % oid) lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid)) - def read(self, oid): + def read(self, oid, verify=True): """Read blob from local blobstore.""" if not self.vfs.exists(oid): + blob = self._read(self.cachevfs, oid, verify) + self.ui.note(_('lfs: found %s in the usercache\n') % oid) lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid)) - self.ui.note(_('lfs: found %s in the usercache\n') % oid) else: self.ui.note(_('lfs: found %s in the local lfs store\n') % oid) - return self.vfs.read(oid) + blob = self._read(self.vfs, oid, verify) + return blob + + def _read(self, vfs, oid, verify): + """Read blob (after verifying) from the given store""" + blob = vfs.read(oid) + if verify: + _verify(oid, blob) + return blob def has(self, oid): """Returns True if the local blobstore contains the requested blob, @@ -233,6 +246,8 @@ request = util.urlreq.request(href) if action == 'upload': # If uploading blobs, read data from local blobstore. + with localstore.vfs(oid) as fp: + _verifyfile(oid, fp) request.data = filewithprogress(localstore.vfs(oid), None) request.get_method = lambda: 'PUT' @@ -253,7 +268,7 @@ if action == 'download': # If downloading blobs, store downloaded data to local blobstore - localstore.write(oid, response) + localstore.write(oid, response, verify=True) def _batch(self, pointers, localstore, action): if action not in ['upload', 'download']: @@ -324,14 +339,14 @@ def writebatch(self, pointers, fromstore): for p in pointers: - content = fromstore.read(p.oid()) + content = fromstore.read(p.oid(), verify=True) with self.vfs(p.oid(), 'wb', atomictemp=True) as fp: fp.write(content) def readbatch(self, pointers, tostore): for p in pointers: content = self.vfs.read(p.oid()) - tostore.write(p.oid(), content) + tostore.write(p.oid(), content, verify=True) class _nullremote(object): """Null store storing blobs to /dev/null.""" @@ -368,6 +383,24 @@ None: _promptremote, } +def _verify(oid, content): + realoid = hashlib.sha256(content).hexdigest() + if realoid != oid: + raise error.Abort(_('detected corrupt lfs object: %s') % oid, + hint=_('run hg verify')) + +def _verifyfile(oid, fp): + sha256 = hashlib.sha256() + while True: + data = fp.read(1024 * 1024) + if not data: + break + sha256.update(data) + realoid = sha256.hexdigest() + if realoid != oid: + raise error.Abort(_('detected corrupt lfs object: %s') % oid, + hint=_('run hg verify')) + def remote(repo): """remotestore factory. return a store in _storemap depending on config""" defaulturl = ''
--- a/hgext/lfs/wrapper.py Mon Dec 04 21:41:04 2017 -0500 +++ b/hgext/lfs/wrapper.py Fri Nov 17 00:06:45 2017 -0500 @@ -54,7 +54,9 @@ if not store.has(oid): p.filename = getattr(self, 'indexfile', None) self.opener.lfsremoteblobstore.readbatch([p], store) - text = store.read(oid) + + # The caller will validate the content + text = store.read(oid, verify=False) # pack hg filelog metadata hgmeta = {} @@ -76,7 +78,7 @@ # git-lfs only supports sha256 oid = hashlib.sha256(text).hexdigest() - self.opener.lfslocalblobstore.write(oid, text) + self.opener.lfslocalblobstore.write(oid, text, verify=False) # replace contents with metadata longoid = 'sha256:%s' % oid
--- a/tests/test-lfs-test-server.t Mon Dec 04 21:41:04 2017 -0500 +++ b/tests/test-lfs-test-server.t Fri Nov 17 00:06:45 2017 -0500 @@ -105,16 +105,15 @@ lfs: found 37a65ab78d5ecda767e8622c248b5dbff1e68b1678ab0e730d5eb8601ec8ad19 in the local lfs store 3 files updated, 0 files merged, 0 files removed, 0 files unresolved -Test a corrupt file download, but clear the cache first to force a download - -XXX: ideally, the validation would occur before polluting the usercache and -local store, with a clearer error message. +Test a corrupt file download, but clear the cache first to force a download. $ rm -rf `hg config lfs.usercache` $ cp $TESTTMP/lfs-content/d1/1e/1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 blob $ echo 'damage' > $TESTTMP/lfs-content/d1/1e/1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 $ rm ../repo1/.hg/store/lfs/objects/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 $ rm ../repo1/* + +XXX: suggesting `hg verify` won't help with a corrupt file on the lfs server. $ hg --repo ../repo1 update -C tip -v resolving manifests getting a @@ -123,18 +122,16 @@ lfs: found 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b in the local lfs store getting c lfs: downloading d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 (19 bytes) - lfs: adding d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 to the usercache - lfs: processed: d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 - lfs: found d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 in the local lfs store - abort: integrity check failed on data/c.i:0! + abort: detected corrupt lfs object: d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 + (run hg verify) [255] -BUG: the corrupted blob was added to the usercache and local store +The corrupted blob is not added to the usercache or local store - $ cat ../repo1/.hg/store/lfs/objects/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 | $TESTDIR/f --sha256 - sha256=fa82ca222fc9813afad3559637960bf311170cdd80ed35287f4623eb2320a660 - $ cat `hg config lfs.usercache`/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 | $TESTDIR/f --sha256 - sha256=fa82ca222fc9813afad3559637960bf311170cdd80ed35287f4623eb2320a660 + $ test -f ../repo1/.hg/store/lfs/objects/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 + [1] + $ test -f `hg config lfs.usercache`/d1/1e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 + [1] $ cp blob $TESTTMP/lfs-content/d1/1e/1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 Test a corrupted file upload @@ -146,7 +143,8 @@ pushing to ../repo1 searching for changes lfs: uploading e659058e26b07b39d2a9c7145b3f99b41f797b6621c8076600e9cb7ee88291f0 (17 bytes) - abort: HTTP error: HTTP Error 500: Internal Server Error (oid=e659058e26b07b39d2a9c7145b3f99b41f797b6621c8076600e9cb7ee88291f0, action=upload)! + abort: detected corrupt lfs object: e659058e26b07b39d2a9c7145b3f99b41f797b6621c8076600e9cb7ee88291f0 + (run hg verify) [255] Check error message when the remote missed a blob:
--- a/tests/test-lfs.t Mon Dec 04 21:41:04 2017 -0500 +++ b/tests/test-lfs.t Fri Nov 17 00:06:45 2017 -0500 @@ -704,18 +704,17 @@ updating to branch default resolving manifests getting l - lfs: adding 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b to the usercache - lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store - abort: integrity check failed on data/l.i:3! + abort: detected corrupt lfs object: 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b + (run hg verify) [255] -BUG: A corrupted lfs blob either shouldn't be created after a transfer from a -file://remotestore, or it shouldn't be left behind. +A corrupted lfs blob is not transferred from a file://remotestore to the +usercache or local store. - $ cat emptycache/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256 - sha256=40f67c7e91d554db4bc500f8f62c2e40f9f61daa5b62388e577bbae26f5396ff - $ cat fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256 - sha256=40f67c7e91d554db4bc500f8f62c2e40f9f61daa5b62388e577bbae26f5396ff + $ test -f emptycache/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b + [1] + $ test -f fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b + [1] $ hg -R fromcorrupt2 verify checking changesets @@ -723,14 +722,13 @@ crosschecking files in changesets and manifests checking files l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0 - l@4: unpacking 6f1ff1f39c11: integrity check failed on data/l.i:3 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0 4 files, 5 changesets, 10 total revisions - 3 integrity errors encountered! + 2 integrity errors encountered! (first damaged changeset appears to be 0) [1] -BUG: push will happily send corrupt files upstream. (The alternate dummy remote +Corrupt local files are not sent upstream. (The alternate dummy remote avoids the corrupt lfs object in the original remote.) $ mkdir $TESTTMP/dummy-remote2 @@ -742,18 +740,9 @@ lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store - 5 changesets found - uncompressed size of bundle content: - 997 (changelog) - 1032 (manifests) - 841 l - 272 large - 788 s - 139 small - adding changesets - adding manifests - adding file changes - added 5 changesets with 10 changes to 4 files + abort: detected corrupt lfs object: 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e + (run hg verify) + [255] $ hg -R fromcorrupt2 --config lfs.url=file:///$TESTTMP/dummy-remote2 verify -v repository uses revlog format 1 @@ -764,27 +753,21 @@ lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store - l@4: unpacking 6f1ff1f39c11: integrity check failed on data/l.i:3 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store 4 files, 5 changesets, 10 total revisions - 3 integrity errors encountered! + 2 integrity errors encountered! (first damaged changeset appears to be 0) [1] $ cat $TESTTMP/dummy-remote2/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256 - sha256=40f67c7e91d554db4bc500f8f62c2e40f9f61daa5b62388e577bbae26f5396ff + sha256=22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b $ cat fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256 - sha256=40f67c7e91d554db4bc500f8f62c2e40f9f61daa5b62388e577bbae26f5396ff - - $ cat $TESTTMP/dummy-remote2/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e - LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS - damage - $ cat $TESTTMP/dummy-remote2/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b - RESTORE-TO-BE-LARGE - damage + sha256=22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b + $ test -f $TESTTMP/dummy-remote2/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e + [1] Accessing a corrupt file will complain