pathencode: for long paths, strip first 5 chars, not first dir
When encoding long paths, the pure Python code strips the first
directory from the path, while the native code currently strips the
first 5 characters. This discrepancy has not been a problem so far,
since we have not stored anything in directories other than
data/. However, we will soon be storing submanifest revlogs in
metadata/, so the discrepancy will have to go [1]. Since file
collisions are avoided by the hashing alone (which is done on the full
unencoded path), it doesn't really matter whether we drop the first
dir, the first 5 characters, or special-case non-data/. To avoid
touching the C code, let's always strip the first 5 characters.
[1] Or maybe elsewhere, but the discrepancy is ugly either way.
--- a/mercurial/store.py Wed May 13 18:57:38 2015 +0200
+++ b/mercurial/store.py Wed May 06 15:58:14 2015 -0700
@@ -187,7 +187,7 @@
def _hashencode(path, dotencode):
digest = _sha(path).hexdigest()
- le = lowerencode(path).split('/')[1:]
+ le = lowerencode(path[5:]).split('/')
parts = _auxencode(le, dotencode)
basename = parts[-1]
_root, ext = os.path.splitext(basename)
--- a/tests/test-hybridencode.py Wed May 13 18:57:38 2015 +0200
+++ b/tests/test-hybridencode.py Wed May 06 15:58:14 2015 -0700
@@ -460,3 +460,9 @@
'VWXYZ-1234567890-xxxxxxxxx-xxxxxxxxx-xxxxxxxx-xxxx'
'xxxxx-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwww'
'wwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww.i')
+
+print "paths outside data/ can be encoded"
+show('metadata/dir/00manifest.i')
+show('metadata/12345678/12345678/12345678/12345678/12345678/'
+ '12345678/12345678/12345678/12345678/12345678/12345678/'
+ '12345678/12345678/00manifest.i')
--- a/tests/test-hybridencode.py.out Wed May 13 18:57:38 2015 +0200
+++ b/tests/test-hybridencode.py.out Wed May 06 15:58:14 2015 -0700
@@ -491,3 +491,10 @@
A = 'data/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12.3456789-12345-ABCDEFGHIJKLMNOPRSTUVWXYZ-abcdefghjiklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPRSTUVWXYZ-1234567890-xxxxxxxxx-xxxxxxxxx-xxxxxxxx-xxxxxxxxx-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww.i'
B = 'dh/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/-xxxxx93352aa50377751d9e5ebdf52da1e6e69a6887a6.i'
+paths outside data/ can be encoded
+A = 'metadata/dir/00manifest.i'
+B = 'metadata/dir/00manifest.i'
+
+A = 'metadata/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/00manifest.i'
+B = 'dh/ata/12345678/12345678/12345678/12345678/12345678/12345678/12345678/00manife0a4da1f89aa2aa9eb0896eb451288419049781b4.i'
+