changeset 35280:8e72f9152c4d

lfs: introduce a user level cache for lfs files This is the same mechanism in place for largefiles, and solves several problems working with multiple local repositories. The existing largefiles method is reused in place, because I suspect that there are other functions that can be shared. If we wait a bit to identify more before `hg cp lfutil.py ...`, the history will be easier to trace. The push between repo14 and repo15 in test-lfs.t arguably shouldn't be uploading any files with a local push. Maybe we can revisit that when `hg push` without 'lfs.url' can upload files to the push destination. Then it would be consistent for blobs in a local push to be linked to the local destination's cache. The cache property is added to run-tests.py, the same as the largefiles property, so that test generated files don't pollute the real location. Having files available locally broke a couple existing lfs-test-server tests, so the cache is cleared in a few places to force file download.
author Matt Harbison <matt_harbison@yahoo.com>
date Wed, 06 Dec 2017 22:56:15 -0500
parents be4481d6222e
children 010179e21e91
files hgext/lfs/__init__.py hgext/lfs/blobstore.py tests/run-tests.py tests/test-basic.t tests/test-commandserver.t tests/test-lfs-test-server.t tests/test-lfs.t
diffstat 7 files changed, 46 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/lfs/__init__.py	Tue Dec 05 23:08:59 2017 -0500
+++ b/hgext/lfs/__init__.py	Wed Dec 06 22:56:15 2017 -0500
@@ -24,6 +24,10 @@
 
     # how many times to retry before giving up on transferring an object
     retry = 5
+
+    # the local directory to store lfs files for sharing across local clones.
+    # If not set, the cache is located in an OS specific cache location.
+    usercache = /path/to/global/cache
 """
 
 from __future__ import absolute_import
@@ -62,6 +66,9 @@
 configitem('lfs', 'url',
     default=configitem.dynamicdefault,
 )
+configitem('lfs', 'usercache',
+    default=None,
+)
 configitem('lfs', 'threshold',
     default=None,
 )
--- a/hgext/lfs/blobstore.py	Tue Dec 05 23:08:59 2017 -0500
+++ b/hgext/lfs/blobstore.py	Wed Dec 06 22:56:15 2017 -0500
@@ -20,6 +20,8 @@
     vfs as vfsmod,
 )
 
+from ..largefiles import lfutil
+
 # 64 bytes for SHA256
 _lfsre = re.compile(r'\A[a-f0-9]{64}\Z')
 
@@ -68,20 +70,29 @@
     def __init__(self, repo):
         fullpath = repo.svfs.join('lfs/objects')
         self.vfs = lfsvfs(fullpath)
+        usercache = lfutil._usercachedir(repo.ui, 'lfs')
+        self.cachevfs = lfsvfs(usercache)
 
     def write(self, oid, data):
         """Write blob to local blobstore."""
         with self.vfs(oid, 'wb', atomictemp=True) as fp:
             fp.write(data)
 
+        # XXX: should we verify the content of the cache, and hardlink back to
+        # the local store on success, but truncate, write and link on failure?
+        if not self.cachevfs.exists(oid):
+            lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
+
     def read(self, oid):
         """Read blob from local blobstore."""
+        if not self.vfs.exists(oid):
+            lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
         return self.vfs.read(oid)
 
     def has(self, oid):
         """Returns True if the local blobstore contains the requested blob,
         False otherwise."""
-        return self.vfs.exists(oid)
+        return self.cachevfs.exists(oid) or self.vfs.exists(oid)
 
 class _gitlfsremote(object):
 
--- a/tests/run-tests.py	Tue Dec 05 23:08:59 2017 -0500
+++ b/tests/run-tests.py	Wed Dec 06 22:56:15 2017 -0500
@@ -1102,6 +1102,9 @@
         hgrc.write(b'[largefiles]\n')
         hgrc.write(b'usercache = %s\n' %
                    (os.path.join(self._testtmp, b'.cache/largefiles')))
+        hgrc.write(b'[lfs]\n')
+        hgrc.write(b'usercache = %s\n' %
+                   (os.path.join(self._testtmp, b'.cache/lfs')))
         hgrc.write(b'[web]\n')
         hgrc.write(b'address = localhost\n')
         hgrc.write(b'ipv6 = %s\n' % str(self._useipv6).encode('ascii'))
--- a/tests/test-basic.t	Tue Dec 05 23:08:59 2017 -0500
+++ b/tests/test-basic.t	Wed Dec 06 22:56:15 2017 -0500
@@ -5,6 +5,7 @@
   devel.default-date=0 0
   extensions.fsmonitor= (fsmonitor !)
   largefiles.usercache=$TESTTMP/.cache/largefiles (glob)
+  lfs.usercache=$TESTTMP/.cache/lfs (glob)
   ui.slash=True
   ui.interactive=False
   ui.mergemarkers=detailed
--- a/tests/test-commandserver.t	Tue Dec 05 23:08:59 2017 -0500
+++ b/tests/test-commandserver.t	Wed Dec 06 22:56:15 2017 -0500
@@ -207,6 +207,7 @@
   devel.default-date=0 0
   extensions.fsmonitor= (fsmonitor !)
   largefiles.usercache=$TESTTMP/.cache/largefiles
+  lfs.usercache=$TESTTMP/.cache/lfs
   ui.slash=True
   ui.interactive=False
   ui.mergemarkers=detailed
--- a/tests/test-lfs-test-server.t	Tue Dec 05 23:08:59 2017 -0500
+++ b/tests/test-lfs-test-server.t	Wed Dec 06 22:56:15 2017 -0500
@@ -53,6 +53,8 @@
   adding file changes
   added 1 changesets with 1 changes to 1 files
 
+Clear the cache to force a download
+  $ rm -rf `hg config lfs.usercache`
   $ cd ../repo2
   $ hg update tip -v
   resolving manifests
@@ -79,6 +81,8 @@
   adding file changes
   added 1 changesets with 3 changes to 3 files
 
+Clear the cache to force a download
+  $ rm -rf `hg config lfs.usercache`
   $ hg --repo ../repo1 update tip -v
   resolving manifests
   getting b
@@ -95,6 +99,7 @@
   $ echo FFFFF >> b
   $ hg commit -m b b
   $ rm -rf .hg/store/lfs
+  $ rm -rf `hg config lfs.usercache`
   $ hg update -C '.^'
   abort: LFS server claims required objects do not exist:
   8e6ea5f6c066b44a0efa43bcce86aea73f17e6e23f0663df0251e7524e140a13!
@@ -118,6 +123,7 @@
   size 6
   x-is-binary 0
   $ cd ..
+  $ rm -rf `hg config lfs.usercache`
   $ hg --config 'lfs.url=https://dewey-lfs.vip.facebook.com/lfs' clone test test2
   updating to branch default
   abort: LFS server error. Remote object for file data/a.i not found:(.*)! (re)
--- a/tests/test-lfs.t	Tue Dec 05 23:08:59 2017 -0500
+++ b/tests/test-lfs.t	Wed Dec 06 22:56:15 2017 -0500
@@ -566,9 +566,9 @@
   repo: repo9
   repo: repo10
 
-TODO: repo12 doesn't have any cached lfs files.  Figure out how to get the
-unpushed files from repo12's source instead of the remote store, where they
-don't exist.
+repo12 doesn't have any cached lfs files and its source never pushed its
+files.  Therefore, the files don't exist in the remote store.  Use the files in
+the user cache.
 
   $ find $TESTTMP/repo12/.hg/store/lfs/objects -type f
   find: */repo12/.hg/store/lfs/objects': $ENOENT$ (glob)
@@ -576,24 +576,28 @@
 
   $ hg --config extensions.share= share repo12 repo13
   updating working directory
-  abort: $TESTTMP/dummy-remote/09/66faba9a01f6c78082aa45899a4fef732002d0b26404e90093adf1e876ab8d: $ENOTDIR$ (glob)
-  [255]
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg -R repo13 -q verify
+
   $ hg clone repo12 repo14
   updating to branch default
-  abort: $TESTTMP/dummy-remote/09/66faba9a01f6c78082aa45899a4fef732002d0b26404e90093adf1e876ab8d: $ENOTDIR$ (glob)
-  [255]
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg -R repo14 -q verify
 
-TODO: If the source repo doesn't have the blob (maybe it was pulled or cloned
-with --noupdate), the blob should be accessible via the global cache to send to
-the remote store.
+If the source repo doesn't have the blob (maybe it was pulled or cloned with
+--noupdate), the blob is still accessible via the global cache to send to the
+remote store.
 
   $ rm -rf $TESTTMP/repo14/.hg/store/lfs
   $ hg init repo15
   $ hg -R repo14 push repo15
   pushing to repo15
   searching for changes
-  abort: $TESTTMP/repo14/.hg/store/lfs/objects/1c/896a0adcf9262119f4a98216aaa5ca00a58b9a0ce848914a02f9cd876f65a3: $ENOTDIR$ (glob)
-  [255]
+  adding changesets
+  adding manifests
+  adding file changes
+  added 3 changesets with 2 changes to 1 files
+  $ hg -R repo14 -q verify
 
 lfs -> normal -> lfs round trip conversions are possible.  The threshold for the
 lfs destination is specified here because it was originally listed in the local