Mercurial > hg
changeset 25027:297ea0df75d0
pathencode: for long paths, strip first 5 chars, not first dir
When encoding long paths, the pure Python code strips the first
directory from the path, while the native code currently strips the
first 5 characters. This discrepancy has not been a problem so far,
since we have not stored anything in directories other than
data/. However, we will soon be storing submanifest revlogs in
metadata/, so the discrepancy will have to go [1]. Since file
collisions are avoided by the hashing alone (which is done on the full
unencoded path), it doesn't really matter whether we drop the first
dir, the first 5 characters, or special-case non-data/. To avoid
touching the C code, let's always strip the first 5 characters.
[1] Or maybe elsewhere, but the discrepancy is ugly either way.
author | Martin von Zweigbergk <martinvonz@google.com> |
---|---|
date | Wed, 06 May 2015 15:58:14 -0700 |
parents | 80d28a88137f |
children | 62c2786b4327 |
files | mercurial/store.py tests/test-hybridencode.py tests/test-hybridencode.py.out |
diffstat | 3 files changed, 14 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/store.py Wed May 13 18:57:38 2015 +0200 +++ b/mercurial/store.py Wed May 06 15:58:14 2015 -0700 @@ -187,7 +187,7 @@ def _hashencode(path, dotencode): digest = _sha(path).hexdigest() - le = lowerencode(path).split('/')[1:] + le = lowerencode(path[5:]).split('/') parts = _auxencode(le, dotencode) basename = parts[-1] _root, ext = os.path.splitext(basename)
--- a/tests/test-hybridencode.py Wed May 13 18:57:38 2015 +0200 +++ b/tests/test-hybridencode.py Wed May 06 15:58:14 2015 -0700 @@ -460,3 +460,9 @@ 'VWXYZ-1234567890-xxxxxxxxx-xxxxxxxxx-xxxxxxxx-xxxx' 'xxxxx-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwww' 'wwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww.i') + +print "paths outside data/ can be encoded" +show('metadata/dir/00manifest.i') +show('metadata/12345678/12345678/12345678/12345678/12345678/' + '12345678/12345678/12345678/12345678/12345678/12345678/' + '12345678/12345678/00manifest.i')
--- a/tests/test-hybridencode.py.out Wed May 13 18:57:38 2015 +0200 +++ b/tests/test-hybridencode.py.out Wed May 06 15:58:14 2015 -0700 @@ -491,3 +491,10 @@ A = 'data/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12.3456789-12345-ABCDEFGHIJKLMNOPRSTUVWXYZ-abcdefghjiklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPRSTUVWXYZ-1234567890-xxxxxxxxx-xxxxxxxxx-xxxxxxxx-xxxxxxxxx-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww.i' B = 'dh/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/-xxxxx93352aa50377751d9e5ebdf52da1e6e69a6887a6.i' +paths outside data/ can be encoded +A = 'metadata/dir/00manifest.i' +B = 'metadata/dir/00manifest.i' + +A = 'metadata/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/00manifest.i' +B = 'dh/ata/12345678/12345678/12345678/12345678/12345678/12345678/12345678/00manife0a4da1f89aa2aa9eb0896eb451288419049781b4.i' +