lfs: migrate most file filtering from threshold to custom filter
authorMatt Harbison <matt_harbison@yahoo.com>
Sun, 31 Dec 2017 02:54:49 -0500
changeset 35618 c780e0649e41
parent 35617 b75ea116603d
child 35619 c751b9fdbc40
lfs: migrate most file filtering from threshold to custom filter Migrate `lfs.threshold` to more powerful `lfs.filter` added by D4990618 so people can specify what files to be stored in LFS with more flexibility. This patch was authored by Jun Wu for the fb-experimental repo, to avoid using matcher for efficiency[1]. All I've changed here is to register the new 'lfs.track' default so that the tests run cleanly, and adapt the subsequent language changes. Migrating the remaining uses of 'lfs.threshold' can be done separately since there's a fallback in place. [1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-December/109388.html
hgext/lfs/__init__.py
hgext/lfs/wrapper.py
tests/test-lfs-test-server.t
tests/test-lfs.t
--- a/hgext/lfs/__init__.py	Sun Dec 31 12:47:36 2017 -0500
+++ b/hgext/lfs/__init__.py	Sun Dec 31 02:54:49 2017 -0500
@@ -19,8 +19,23 @@
     # (default: unset)
     url = https://example.com/lfs
 
-    # size of a file to make it use LFS
-    threshold = 10M
+    # Which files to track in LFS.  Path tests are "**.extname" for file
+    # extensions, and "path:under/some/directory" for path prefix.  Both
+    # are relative to the repository root, and the latter must be quoted.
+    # File size can be tested with the "size()" fileset, and tests can be
+    # joined with fileset operators.  (See "hg help filesets.operators".)
+    #
+    # Some examples:
+    # - all()                       # everything
+    # - none()                      # nothing
+    # - size(">20MB")               # larger than 20MB
+    # - !**.txt                     # anything not a *.txt file
+    # - **.zip | **.tar.gz | **.7z  # some types of compressed files
+    # - "path:bin"                  # files under "bin" in the project root
+    # - (**.php & size(">2MB")) | (**.js & size(">5MB")) | **.tar.gz
+    #     | ("path:bin" & !"path:/bin/README") | size(">1GB")
+    # (default: none())
+    track = size(">10M")
 
     # how many times to retry before giving up on transferring an object
     retry = 5
@@ -41,8 +56,10 @@
     exchange,
     extensions,
     filelog,
+    fileset,
     hg,
     localrepo,
+    minifileset,
     node,
     registrar,
     revlog,
@@ -76,9 +93,13 @@
 configitem('lfs', 'usercache',
     default=None,
 )
+# Deprecated
 configitem('lfs', 'threshold',
     default=None,
 )
+configitem('lfs', 'track',
+    default='none()',
+)
 configitem('lfs', 'retry',
     default=5,
 )
@@ -100,9 +121,15 @@
     if not repo.local():
         return
 
-    threshold = repo.ui.configbytes('lfs', 'threshold')
+    trackspec = repo.ui.config('lfs', 'track')
 
-    repo.svfs.options['lfsthreshold'] = threshold
+    # deprecated config: lfs.threshold
+    threshold = repo.ui.configbytes('lfs', 'threshold')
+    if threshold:
+        fileset.parse(trackspec)  # make sure syntax errors are confined
+        trackspec = "(%s) | size('>%d')" % (trackspec, threshold)
+
+    repo.svfs.options['lfstrack'] = minifileset.compile(trackspec)
     repo.svfs.lfslocalblobstore = blobstore.local(repo)
     repo.svfs.lfsremoteblobstore = blobstore.remote(repo)
 
--- a/hgext/lfs/wrapper.py	Sun Dec 31 12:47:36 2017 -0500
+++ b/hgext/lfs/wrapper.py	Sun Dec 31 02:54:49 2017 -0500
@@ -123,14 +123,14 @@
 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
                        cachedelta=None, node=None,
                        flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
-    threshold = self.opener.options['lfsthreshold']
     textlen = len(text)
     # exclude hg rename meta from file size
     meta, offset = filelog.parsemeta(text)
     if offset:
         textlen -= offset
 
-    if threshold and textlen > threshold:
+    lfstrack = self.opener.options['lfstrack']
+    if lfstrack(self.filename, textlen):
         flags |= revlog.REVIDX_EXTSTORED
 
     return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
--- a/tests/test-lfs-test-server.t	Sun Dec 31 12:47:36 2017 -0500
+++ b/tests/test-lfs-test-server.t	Sun Dec 31 02:54:49 2017 -0500
@@ -30,7 +30,7 @@
   > lfs=
   > [lfs]
   > url=http://foo:bar@$LFS_HOST/
-  > threshold=1
+  > track=all()
   > EOF
 
   $ hg init repo1
--- a/tests/test-lfs.t	Sun Dec 31 12:47:36 2017 -0500
+++ b/tests/test-lfs.t	Sun Dec 31 02:54:49 2017 -0500
@@ -4,6 +4,7 @@
   > [extensions]
   > lfs=
   > [lfs]
+  > # Test deprecated config
   > threshold=1000B
   > EOF
 
@@ -140,7 +141,7 @@
   $ cd repo3
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=size(">10B")
   > EOF
 
   $ echo LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS > large
@@ -203,7 +204,7 @@
   $ cd repo6
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=30B
+  > track=size(">30B")
   > EOF
 
   $ echo LARGE-BECAUSE-IT-IS-MORE-THAN-30-BYTES > large
@@ -239,7 +240,7 @@
   $ cd repo8
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=size(">10B")
   > EOF
 
   $ echo THIS-IS-LFS-BECAUSE-10-BYTES > a1
@@ -320,7 +321,7 @@
   $ cd repo9
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=size(">10B")
   > [diff]
   > git=1
   > EOF
@@ -454,7 +455,7 @@
   > [extensions]
   > lfs=
   > [lfs]
-  > threshold=1
+  > track=all()
   > EOF
   $ $PYTHON <<'EOF'
   > def write(path, content):
@@ -542,6 +543,47 @@
 
   $ cd ..
 
+# Test filter
+
+  $ hg init repo11
+  $ cd repo11
+  $ cat >> .hg/hgrc << EOF
+  > [lfs]
+  > track=(**.a & size(">5B")) | (**.b & !size(">5B"))
+  >      | (**.c & "path:d" & !"path:d/c.c") | size(">10B")
+  > EOF
+
+  $ mkdir a
+  $ echo aaaaaa > a/1.a
+  $ echo a > a/2.a
+  $ echo aaaaaa > 1.b
+  $ echo a > 2.b
+  $ echo a > 1.c
+  $ mkdir d
+  $ echo a > d/c.c
+  $ echo a > d/d.c
+  $ echo aaaaaaaaaaaa > x
+  $ hg add . -q
+  $ hg commit -m files
+
+  $ for p in a/1.a a/2.a 1.b 2.b 1.c d/c.c d/d.c x; do
+  >   if hg debugdata $p 0 2>&1 | grep git-lfs >/dev/null; then
+  >     echo "${p}: is lfs"
+  >   else
+  >     echo "${p}: not lfs"
+  >   fi
+  > done
+  a/1.a: is lfs
+  a/2.a: not lfs
+  1.b: not lfs
+  2.b: is lfs
+  1.c: not lfs
+  d/c.c: not lfs
+  d/d.c: is lfs
+  x: is lfs
+
+  $ cd ..
+
 # Verify the repos
 
   $ cat > $TESTTMP/dumpflog.py << EOF