changeset 25027:297ea0df75d0

pathencode: for long paths, strip first 5 chars, not first dir When encoding long paths, the pure Python code strips the first directory from the path, while the native code currently strips the first 5 characters. This discrepancy has not been a problem so far, since we have not stored anything in directories other than data/. However, we will soon be storing submanifest revlogs in metadata/, so the discrepancy will have to go [1]. Since file collisions are avoided by the hashing alone (which is done on the full unencoded path), it doesn't really matter whether we drop the first dir, the first 5 characters, or special-case non-data/. To avoid touching the C code, let's always strip the first 5 characters. [1] Or maybe elsewhere, but the discrepancy is ugly either way.
author Martin von Zweigbergk <martinvonz@google.com>
date Wed, 06 May 2015 15:58:14 -0700
parents 80d28a88137f
children 62c2786b4327
files mercurial/store.py tests/test-hybridencode.py tests/test-hybridencode.py.out
diffstat 3 files changed, 14 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/store.py	Wed May 13 18:57:38 2015 +0200
+++ b/mercurial/store.py	Wed May 06 15:58:14 2015 -0700
@@ -187,7 +187,7 @@
 
 def _hashencode(path, dotencode):
     digest = _sha(path).hexdigest()
-    le = lowerencode(path).split('/')[1:]
+    le = lowerencode(path[5:]).split('/')
     parts = _auxencode(le, dotencode)
     basename = parts[-1]
     _root, ext = os.path.splitext(basename)
--- a/tests/test-hybridencode.py	Wed May 13 18:57:38 2015 +0200
+++ b/tests/test-hybridencode.py	Wed May 06 15:58:14 2015 -0700
@@ -460,3 +460,9 @@
           'VWXYZ-1234567890-xxxxxxxxx-xxxxxxxxx-xxxxxxxx-xxxx'
           'xxxxx-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwww'
           'wwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww.i')
+
+print "paths outside data/ can be encoded"
+show('metadata/dir/00manifest.i')
+show('metadata/12345678/12345678/12345678/12345678/12345678/'
+          '12345678/12345678/12345678/12345678/12345678/12345678/'
+          '12345678/12345678/00manifest.i')
--- a/tests/test-hybridencode.py.out	Wed May 13 18:57:38 2015 +0200
+++ b/tests/test-hybridencode.py.out	Wed May 06 15:58:14 2015 -0700
@@ -491,3 +491,10 @@
 A = 'data/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12.3456789-12345-ABCDEFGHIJKLMNOPRSTUVWXYZ-abcdefghjiklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPRSTUVWXYZ-1234567890-xxxxxxxxx-xxxxxxxxx-xxxxxxxx-xxxxxxxxx-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww.i'
 B = 'dh/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/-xxxxx93352aa50377751d9e5ebdf52da1e6e69a6887a6.i'
 
+paths outside data/ can be encoded
+A = 'metadata/dir/00manifest.i'
+B = 'metadata/dir/00manifest.i'
+
+A = 'metadata/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/00manifest.i'
+B = 'dh/ata/12345678/12345678/12345678/12345678/12345678/12345678/12345678/00manife0a4da1f89aa2aa9eb0896eb451288419049781b4.i'
+