lfs: migrate most file filtering from threshold to custom filter
Migrate `lfs.threshold` to more powerful `lfs.filter` added by
D4990618 so
people can specify what files to be stored in LFS with more flexibility.
This patch was authored by Jun Wu for the fb-experimental repo, to avoid using
matcher for efficiency[1]. All I've changed here is to register the new
'lfs.track' default so that the tests run cleanly, and adapt the subsequent
language changes. Migrating the remaining uses of 'lfs.threshold' can be done
separately since there's a fallback in place.
[1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-December/109388.html
--- a/hgext/lfs/__init__.py Sun Dec 31 12:47:36 2017 -0500
+++ b/hgext/lfs/__init__.py Sun Dec 31 02:54:49 2017 -0500
@@ -19,8 +19,23 @@
# (default: unset)
url = https://example.com/lfs
- # size of a file to make it use LFS
- threshold = 10M
+ # Which files to track in LFS. Path tests are "**.extname" for file
+ # extensions, and "path:under/some/directory" for path prefix. Both
+ # are relative to the repository root, and the latter must be quoted.
+ # File size can be tested with the "size()" fileset, and tests can be
+ # joined with fileset operators. (See "hg help filesets.operators".)
+ #
+ # Some examples:
+ # - all() # everything
+ # - none() # nothing
+ # - size(">20MB") # larger than 20MB
+ # - !**.txt # anything not a *.txt file
+ # - **.zip | **.tar.gz | **.7z # some types of compressed files
+ # - "path:bin" # files under "bin" in the project root
+ # - (**.php & size(">2MB")) | (**.js & size(">5MB")) | **.tar.gz
+ # | ("path:bin" & !"path:/bin/README") | size(">1GB")
+ # (default: none())
+ track = size(">10M")
# how many times to retry before giving up on transferring an object
retry = 5
@@ -41,8 +56,10 @@
exchange,
extensions,
filelog,
+ fileset,
hg,
localrepo,
+ minifileset,
node,
registrar,
revlog,
@@ -76,9 +93,13 @@
configitem('lfs', 'usercache',
default=None,
)
+# Deprecated
configitem('lfs', 'threshold',
default=None,
)
+configitem('lfs', 'track',
+ default='none()',
+)
configitem('lfs', 'retry',
default=5,
)
@@ -100,9 +121,15 @@
if not repo.local():
return
- threshold = repo.ui.configbytes('lfs', 'threshold')
+ trackspec = repo.ui.config('lfs', 'track')
- repo.svfs.options['lfsthreshold'] = threshold
+ # deprecated config: lfs.threshold
+ threshold = repo.ui.configbytes('lfs', 'threshold')
+ if threshold:
+ fileset.parse(trackspec) # make sure syntax errors are confined
+ trackspec = "(%s) | size('>%d')" % (trackspec, threshold)
+
+ repo.svfs.options['lfstrack'] = minifileset.compile(trackspec)
repo.svfs.lfslocalblobstore = blobstore.local(repo)
repo.svfs.lfsremoteblobstore = blobstore.remote(repo)
--- a/hgext/lfs/wrapper.py Sun Dec 31 12:47:36 2017 -0500
+++ b/hgext/lfs/wrapper.py Sun Dec 31 02:54:49 2017 -0500
@@ -123,14 +123,14 @@
def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
cachedelta=None, node=None,
flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
- threshold = self.opener.options['lfsthreshold']
textlen = len(text)
# exclude hg rename meta from file size
meta, offset = filelog.parsemeta(text)
if offset:
textlen -= offset
- if threshold and textlen > threshold:
+ lfstrack = self.opener.options['lfstrack']
+ if lfstrack(self.filename, textlen):
flags |= revlog.REVIDX_EXTSTORED
return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
--- a/tests/test-lfs-test-server.t Sun Dec 31 12:47:36 2017 -0500
+++ b/tests/test-lfs-test-server.t Sun Dec 31 02:54:49 2017 -0500
@@ -30,7 +30,7 @@
> lfs=
> [lfs]
> url=http://foo:bar@$LFS_HOST/
- > threshold=1
+ > track=all()
> EOF
$ hg init repo1
--- a/tests/test-lfs.t Sun Dec 31 12:47:36 2017 -0500
+++ b/tests/test-lfs.t Sun Dec 31 02:54:49 2017 -0500
@@ -4,6 +4,7 @@
> [extensions]
> lfs=
> [lfs]
+ > # Test deprecated config
> threshold=1000B
> EOF
@@ -140,7 +141,7 @@
$ cd repo3
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=10B
+ > track=size(">10B")
> EOF
$ echo LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS > large
@@ -203,7 +204,7 @@
$ cd repo6
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=30B
+ > track=size(">30B")
> EOF
$ echo LARGE-BECAUSE-IT-IS-MORE-THAN-30-BYTES > large
@@ -239,7 +240,7 @@
$ cd repo8
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=10B
+ > track=size(">10B")
> EOF
$ echo THIS-IS-LFS-BECAUSE-10-BYTES > a1
@@ -320,7 +321,7 @@
$ cd repo9
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=10B
+ > track=size(">10B")
> [diff]
> git=1
> EOF
@@ -454,7 +455,7 @@
> [extensions]
> lfs=
> [lfs]
- > threshold=1
+ > track=all()
> EOF
$ $PYTHON <<'EOF'
> def write(path, content):
@@ -542,6 +543,47 @@
$ cd ..
+# Test filter
+
+ $ hg init repo11
+ $ cd repo11
+ $ cat >> .hg/hgrc << EOF
+ > [lfs]
+ > track=(**.a & size(">5B")) | (**.b & !size(">5B"))
+ > | (**.c & "path:d" & !"path:d/c.c") | size(">10B")
+ > EOF
+
+ $ mkdir a
+ $ echo aaaaaa > a/1.a
+ $ echo a > a/2.a
+ $ echo aaaaaa > 1.b
+ $ echo a > 2.b
+ $ echo a > 1.c
+ $ mkdir d
+ $ echo a > d/c.c
+ $ echo a > d/d.c
+ $ echo aaaaaaaaaaaa > x
+ $ hg add . -q
+ $ hg commit -m files
+
+ $ for p in a/1.a a/2.a 1.b 2.b 1.c d/c.c d/d.c x; do
+ > if hg debugdata $p 0 2>&1 | grep git-lfs >/dev/null; then
+ > echo "${p}: is lfs"
+ > else
+ > echo "${p}: not lfs"
+ > fi
+ > done
+ a/1.a: is lfs
+ a/2.a: not lfs
+ 1.b: not lfs
+ 2.b: is lfs
+ 1.c: not lfs
+ d/c.c: not lfs
+ d/d.c: is lfs
+ x: is lfs
+
+ $ cd ..
+
# Verify the repos
$ cat > $TESTTMP/dumpflog.py << EOF