Mercurial > hg
changeset 35280:8e72f9152c4d
lfs: introduce a user level cache for lfs files
This is the same mechanism in place for largefiles, and solves several problems
working with multiple local repositories. The existing largefiles method is
reused in place, because I suspect that there are other functions that can be
shared. If we wait a bit to identify more before `hg cp lfutil.py ...`, the
history will be easier to trace.
The push between repo14 and repo15 in test-lfs.t arguably shouldn't be uploading
any files with a local push. Maybe we can revisit that when `hg push` without
'lfs.url' can upload files to the push destination. Then it would be consistent
for blobs in a local push to be linked to the local destination's cache.
The cache property is added to run-tests.py, the same as the largefiles
property, so that test generated files don't pollute the real location. Having
files available locally broke a couple existing lfs-test-server tests, so the
cache is cleared in a few places to force file download.
author | Matt Harbison <matt_harbison@yahoo.com> |
---|---|
date | Wed, 06 Dec 2017 22:56:15 -0500 |
parents | be4481d6222e |
children | 010179e21e91 |
files | hgext/lfs/__init__.py hgext/lfs/blobstore.py tests/run-tests.py tests/test-basic.t tests/test-commandserver.t tests/test-lfs-test-server.t tests/test-lfs.t |
diffstat | 7 files changed, 46 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/lfs/__init__.py Tue Dec 05 23:08:59 2017 -0500 +++ b/hgext/lfs/__init__.py Wed Dec 06 22:56:15 2017 -0500 @@ -24,6 +24,10 @@ # how many times to retry before giving up on transferring an object retry = 5 + + # the local directory to store lfs files for sharing across local clones. + # If not set, the cache is located in an OS specific cache location. + usercache = /path/to/global/cache """ from __future__ import absolute_import @@ -62,6 +66,9 @@ configitem('lfs', 'url', default=configitem.dynamicdefault, ) +configitem('lfs', 'usercache', + default=None, +) configitem('lfs', 'threshold', default=None, )
--- a/hgext/lfs/blobstore.py Tue Dec 05 23:08:59 2017 -0500 +++ b/hgext/lfs/blobstore.py Wed Dec 06 22:56:15 2017 -0500 @@ -20,6 +20,8 @@ vfs as vfsmod, ) +from ..largefiles import lfutil + # 64 bytes for SHA256 _lfsre = re.compile(r'\A[a-f0-9]{64}\Z') @@ -68,20 +70,29 @@ def __init__(self, repo): fullpath = repo.svfs.join('lfs/objects') self.vfs = lfsvfs(fullpath) + usercache = lfutil._usercachedir(repo.ui, 'lfs') + self.cachevfs = lfsvfs(usercache) def write(self, oid, data): """Write blob to local blobstore.""" with self.vfs(oid, 'wb', atomictemp=True) as fp: fp.write(data) + # XXX: should we verify the content of the cache, and hardlink back to + # the local store on success, but truncate, write and link on failure? + if not self.cachevfs.exists(oid): + lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid)) + def read(self, oid): """Read blob from local blobstore.""" + if not self.vfs.exists(oid): + lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid)) return self.vfs.read(oid) def has(self, oid): """Returns True if the local blobstore contains the requested blob, False otherwise.""" - return self.vfs.exists(oid) + return self.cachevfs.exists(oid) or self.vfs.exists(oid) class _gitlfsremote(object):
--- a/tests/run-tests.py Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/run-tests.py Wed Dec 06 22:56:15 2017 -0500 @@ -1102,6 +1102,9 @@ hgrc.write(b'[largefiles]\n') hgrc.write(b'usercache = %s\n' % (os.path.join(self._testtmp, b'.cache/largefiles'))) + hgrc.write(b'[lfs]\n') + hgrc.write(b'usercache = %s\n' % + (os.path.join(self._testtmp, b'.cache/lfs'))) hgrc.write(b'[web]\n') hgrc.write(b'address = localhost\n') hgrc.write(b'ipv6 = %s\n' % str(self._useipv6).encode('ascii'))
--- a/tests/test-basic.t Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/test-basic.t Wed Dec 06 22:56:15 2017 -0500 @@ -5,6 +5,7 @@ devel.default-date=0 0 extensions.fsmonitor= (fsmonitor !) largefiles.usercache=$TESTTMP/.cache/largefiles (glob) + lfs.usercache=$TESTTMP/.cache/lfs (glob) ui.slash=True ui.interactive=False ui.mergemarkers=detailed
--- a/tests/test-commandserver.t Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/test-commandserver.t Wed Dec 06 22:56:15 2017 -0500 @@ -207,6 +207,7 @@ devel.default-date=0 0 extensions.fsmonitor= (fsmonitor !) largefiles.usercache=$TESTTMP/.cache/largefiles + lfs.usercache=$TESTTMP/.cache/lfs ui.slash=True ui.interactive=False ui.mergemarkers=detailed
--- a/tests/test-lfs-test-server.t Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/test-lfs-test-server.t Wed Dec 06 22:56:15 2017 -0500 @@ -53,6 +53,8 @@ adding file changes added 1 changesets with 1 changes to 1 files +Clear the cache to force a download + $ rm -rf `hg config lfs.usercache` $ cd ../repo2 $ hg update tip -v resolving manifests @@ -79,6 +81,8 @@ adding file changes added 1 changesets with 3 changes to 3 files +Clear the cache to force a download + $ rm -rf `hg config lfs.usercache` $ hg --repo ../repo1 update tip -v resolving manifests getting b @@ -95,6 +99,7 @@ $ echo FFFFF >> b $ hg commit -m b b $ rm -rf .hg/store/lfs + $ rm -rf `hg config lfs.usercache` $ hg update -C '.^' abort: LFS server claims required objects do not exist: 8e6ea5f6c066b44a0efa43bcce86aea73f17e6e23f0663df0251e7524e140a13! @@ -118,6 +123,7 @@ size 6 x-is-binary 0 $ cd .. + $ rm -rf `hg config lfs.usercache` $ hg --config 'lfs.url=https://dewey-lfs.vip.facebook.com/lfs' clone test test2 updating to branch default abort: LFS server error. Remote object for file data/a.i not found:(.*)! (re)
--- a/tests/test-lfs.t Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/test-lfs.t Wed Dec 06 22:56:15 2017 -0500 @@ -566,9 +566,9 @@ repo: repo9 repo: repo10 -TODO: repo12 doesn't have any cached lfs files. Figure out how to get the -unpushed files from repo12's source instead of the remote store, where they -don't exist. +repo12 doesn't have any cached lfs files and its source never pushed its +files. Therefore, the files don't exist in the remote store. Use the files in +the user cache. $ find $TESTTMP/repo12/.hg/store/lfs/objects -type f find: */repo12/.hg/store/lfs/objects': $ENOENT$ (glob) @@ -576,24 +576,28 @@ $ hg --config extensions.share= share repo12 repo13 updating working directory - abort: $TESTTMP/dummy-remote/09/66faba9a01f6c78082aa45899a4fef732002d0b26404e90093adf1e876ab8d: $ENOTDIR$ (glob) - [255] + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ hg -R repo13 -q verify + $ hg clone repo12 repo14 updating to branch default - abort: $TESTTMP/dummy-remote/09/66faba9a01f6c78082aa45899a4fef732002d0b26404e90093adf1e876ab8d: $ENOTDIR$ (glob) - [255] + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ hg -R repo14 -q verify -TODO: If the source repo doesn't have the blob (maybe it was pulled or cloned -with --noupdate), the blob should be accessible via the global cache to send to -the remote store. +If the source repo doesn't have the blob (maybe it was pulled or cloned with +--noupdate), the blob is still accessible via the global cache to send to the +remote store. $ rm -rf $TESTTMP/repo14/.hg/store/lfs $ hg init repo15 $ hg -R repo14 push repo15 pushing to repo15 searching for changes - abort: $TESTTMP/repo14/.hg/store/lfs/objects/1c/896a0adcf9262119f4a98216aaa5ca00a58b9a0ce848914a02f9cd876f65a3: $ENOTDIR$ (glob) - [255] + adding changesets + adding manifests + adding file changes + added 3 changesets with 2 changes to 1 files + $ hg -R repo14 -q verify lfs -> normal -> lfs round trip conversions are possible. The threshold for the lfs destination is specified here because it was originally listed in the local