# HG changeset patch # User Matt Harbison # Date 1512618975 18000 # Node ID 8e72f9152c4dbd3820a750539dc5eef53d040bfa # Parent be4481d6222e22ea904e9629da8e36b26b8879bc lfs: introduce a user level cache for lfs files This is the same mechanism in place for largefiles, and solves several problems working with multiple local repositories. The existing largefiles method is reused in place, because I suspect that there are other functions that can be shared. If we wait a bit to identify more before `hg cp lfutil.py ...`, the history will be easier to trace. The push between repo14 and repo15 in test-lfs.t arguably shouldn't be uploading any files with a local push. Maybe we can revisit that when `hg push` without 'lfs.url' can upload files to the push destination. Then it would be consistent for blobs in a local push to be linked to the local destination's cache. The cache property is added to run-tests.py, the same as the largefiles property, so that test generated files don't pollute the real location. Having files available locally broke a couple existing lfs-test-server tests, so the cache is cleared in a few places to force file download. diff -r be4481d6222e -r 8e72f9152c4d hgext/lfs/__init__.py --- a/hgext/lfs/__init__.py Tue Dec 05 23:08:59 2017 -0500 +++ b/hgext/lfs/__init__.py Wed Dec 06 22:56:15 2017 -0500 @@ -24,6 +24,10 @@ # how many times to retry before giving up on transferring an object retry = 5 + + # the local directory to store lfs files for sharing across local clones. + # If not set, the cache is located in an OS specific cache location. + usercache = /path/to/global/cache """ from __future__ import absolute_import @@ -62,6 +66,9 @@ configitem('lfs', 'url', default=configitem.dynamicdefault, ) +configitem('lfs', 'usercache', + default=None, +) configitem('lfs', 'threshold', default=None, ) diff -r be4481d6222e -r 8e72f9152c4d hgext/lfs/blobstore.py --- a/hgext/lfs/blobstore.py Tue Dec 05 23:08:59 2017 -0500 +++ b/hgext/lfs/blobstore.py Wed Dec 06 22:56:15 2017 -0500 @@ -20,6 +20,8 @@ vfs as vfsmod, ) +from ..largefiles import lfutil + # 64 bytes for SHA256 _lfsre = re.compile(r'\A[a-f0-9]{64}\Z') @@ -68,20 +70,29 @@ def __init__(self, repo): fullpath = repo.svfs.join('lfs/objects') self.vfs = lfsvfs(fullpath) + usercache = lfutil._usercachedir(repo.ui, 'lfs') + self.cachevfs = lfsvfs(usercache) def write(self, oid, data): """Write blob to local blobstore.""" with self.vfs(oid, 'wb', atomictemp=True) as fp: fp.write(data) + # XXX: should we verify the content of the cache, and hardlink back to + # the local store on success, but truncate, write and link on failure? + if not self.cachevfs.exists(oid): + lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid)) + def read(self, oid): """Read blob from local blobstore.""" + if not self.vfs.exists(oid): + lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid)) return self.vfs.read(oid) def has(self, oid): """Returns True if the local blobstore contains the requested blob, False otherwise.""" - return self.vfs.exists(oid) + return self.cachevfs.exists(oid) or self.vfs.exists(oid) class _gitlfsremote(object): diff -r be4481d6222e -r 8e72f9152c4d tests/run-tests.py --- a/tests/run-tests.py Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/run-tests.py Wed Dec 06 22:56:15 2017 -0500 @@ -1102,6 +1102,9 @@ hgrc.write(b'[largefiles]\n') hgrc.write(b'usercache = %s\n' % (os.path.join(self._testtmp, b'.cache/largefiles'))) + hgrc.write(b'[lfs]\n') + hgrc.write(b'usercache = %s\n' % + (os.path.join(self._testtmp, b'.cache/lfs'))) hgrc.write(b'[web]\n') hgrc.write(b'address = localhost\n') hgrc.write(b'ipv6 = %s\n' % str(self._useipv6).encode('ascii')) diff -r be4481d6222e -r 8e72f9152c4d tests/test-basic.t --- a/tests/test-basic.t Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/test-basic.t Wed Dec 06 22:56:15 2017 -0500 @@ -5,6 +5,7 @@ devel.default-date=0 0 extensions.fsmonitor= (fsmonitor !) largefiles.usercache=$TESTTMP/.cache/largefiles (glob) + lfs.usercache=$TESTTMP/.cache/lfs (glob) ui.slash=True ui.interactive=False ui.mergemarkers=detailed diff -r be4481d6222e -r 8e72f9152c4d tests/test-commandserver.t --- a/tests/test-commandserver.t Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/test-commandserver.t Wed Dec 06 22:56:15 2017 -0500 @@ -207,6 +207,7 @@ devel.default-date=0 0 extensions.fsmonitor= (fsmonitor !) largefiles.usercache=$TESTTMP/.cache/largefiles + lfs.usercache=$TESTTMP/.cache/lfs ui.slash=True ui.interactive=False ui.mergemarkers=detailed diff -r be4481d6222e -r 8e72f9152c4d tests/test-lfs-test-server.t --- a/tests/test-lfs-test-server.t Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/test-lfs-test-server.t Wed Dec 06 22:56:15 2017 -0500 @@ -53,6 +53,8 @@ adding file changes added 1 changesets with 1 changes to 1 files +Clear the cache to force a download + $ rm -rf `hg config lfs.usercache` $ cd ../repo2 $ hg update tip -v resolving manifests @@ -79,6 +81,8 @@ adding file changes added 1 changesets with 3 changes to 3 files +Clear the cache to force a download + $ rm -rf `hg config lfs.usercache` $ hg --repo ../repo1 update tip -v resolving manifests getting b @@ -95,6 +99,7 @@ $ echo FFFFF >> b $ hg commit -m b b $ rm -rf .hg/store/lfs + $ rm -rf `hg config lfs.usercache` $ hg update -C '.^' abort: LFS server claims required objects do not exist: 8e6ea5f6c066b44a0efa43bcce86aea73f17e6e23f0663df0251e7524e140a13! @@ -118,6 +123,7 @@ size 6 x-is-binary 0 $ cd .. + $ rm -rf `hg config lfs.usercache` $ hg --config 'lfs.url=https://dewey-lfs.vip.facebook.com/lfs' clone test test2 updating to branch default abort: LFS server error. Remote object for file data/a.i not found:(.*)! (re) diff -r be4481d6222e -r 8e72f9152c4d tests/test-lfs.t --- a/tests/test-lfs.t Tue Dec 05 23:08:59 2017 -0500 +++ b/tests/test-lfs.t Wed Dec 06 22:56:15 2017 -0500 @@ -566,9 +566,9 @@ repo: repo9 repo: repo10 -TODO: repo12 doesn't have any cached lfs files. Figure out how to get the -unpushed files from repo12's source instead of the remote store, where they -don't exist. +repo12 doesn't have any cached lfs files and its source never pushed its +files. Therefore, the files don't exist in the remote store. Use the files in +the user cache. $ find $TESTTMP/repo12/.hg/store/lfs/objects -type f find: */repo12/.hg/store/lfs/objects': $ENOENT$ (glob) @@ -576,24 +576,28 @@ $ hg --config extensions.share= share repo12 repo13 updating working directory - abort: $TESTTMP/dummy-remote/09/66faba9a01f6c78082aa45899a4fef732002d0b26404e90093adf1e876ab8d: $ENOTDIR$ (glob) - [255] + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ hg -R repo13 -q verify + $ hg clone repo12 repo14 updating to branch default - abort: $TESTTMP/dummy-remote/09/66faba9a01f6c78082aa45899a4fef732002d0b26404e90093adf1e876ab8d: $ENOTDIR$ (glob) - [255] + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ hg -R repo14 -q verify -TODO: If the source repo doesn't have the blob (maybe it was pulled or cloned -with --noupdate), the blob should be accessible via the global cache to send to -the remote store. +If the source repo doesn't have the blob (maybe it was pulled or cloned with +--noupdate), the blob is still accessible via the global cache to send to the +remote store. $ rm -rf $TESTTMP/repo14/.hg/store/lfs $ hg init repo15 $ hg -R repo14 push repo15 pushing to repo15 searching for changes - abort: $TESTTMP/repo14/.hg/store/lfs/objects/1c/896a0adcf9262119f4a98216aaa5ca00a58b9a0ce848914a02f9cd876f65a3: $ENOTDIR$ (glob) - [255] + adding changesets + adding manifests + adding file changes + added 3 changesets with 2 changes to 1 files + $ hg -R repo14 -q verify lfs -> normal -> lfs round trip conversions are possible. The threshold for the lfs destination is specified here because it was originally listed in the local