introduce fncache repository layout
* adds a new entry 'fncache' to '.hg/requires' for new repos
* writes new file '.hg/store/fncache'
* hash-encodes filenames with long paths (
issue839)
* encodes Windows reserved filenames (
issue793)
--- a/mercurial/localrepo.py Thu Oct 16 19:40:09 2008 +0300
+++ b/mercurial/localrepo.py Sun Oct 19 19:12:07 2008 +0200
@@ -16,7 +16,7 @@
class localrepository(repo.repository):
capabilities = util.set(('lookup', 'changegroupsubset'))
- supported = ('revlogv1', 'store')
+ supported = ('revlogv1', 'store', 'fncache')
def __init__(self, parentui, path=None, create=0):
repo.repository.__init__(self)
@@ -35,6 +35,7 @@
if parentui.configbool('format', 'usestore', True):
os.mkdir(os.path.join(self.path, "store"))
requirements.append("store")
+ requirements.append("fncache")
# create an invalid changelog
self.opener("00changelog.i", "a").write(
'\0\0\0\2' # represents revlogv2
--- a/mercurial/store.py Thu Oct 16 19:40:09 2008 +0300
+++ b/mercurial/store.py Sun Oct 19 19:12:07 2008 +0200
@@ -5,8 +5,11 @@
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.
+from i18n import _
import os, stat, osutil, util
+_sha = util.sha1
+
def _buildencodefun():
e = '_'
win_reserved = [ord(x) for x in '\\:*?"<>|']
@@ -35,6 +38,93 @@
encodefilename, decodefilename = _buildencodefun()
+def _build_lower_encodefun():
+ win_reserved = [ord(x) for x in '\\:*?"<>|']
+ cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
+ for x in (range(32) + range(126, 256) + win_reserved):
+ cmap[chr(x)] = "~%02x" % x
+ for x in range(ord("A"), ord("Z")+1):
+ cmap[chr(x)] = chr(x).lower()
+ return lambda s: "".join([cmap[c] for c in s])
+
+lowerencode = _build_lower_encodefun()
+
+_windows_reserved_filenames = '''con prn aux nul
+ com1 com2 com3 com4 com5 com6 com7 com8 com9
+ lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
+def auxencode(path):
+ res = []
+ for n in path.split('/'):
+ if n:
+ base = n.split('.')[0]
+ if base and (base in _windows_reserved_filenames):
+ # encode third letter ('aux' -> 'au~78')
+ ec = "~%02x" % ord(n[2])
+ n = n[0:2] + ec + n[3:]
+ res.append(n)
+ return '/'.join(res)
+
+MAX_PATH_LEN_IN_HGSTORE = 120
+DIR_PREFIX_LEN = 8
+_MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
+def hybridencode(path):
+ '''encodes path with a length limit
+
+ Encodes all paths that begin with 'data/', according to the following.
+
+ Default encoding (reversible):
+
+ Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
+ characters are encoded as '~xx', where xx is the two digit hex code
+ of the character (see encodefilename).
+ Relevant path components consisting of Windows reserved filenames are
+ masked by encoding the third character ('aux' -> 'au~78', see auxencode).
+
+ Hashed encoding (not reversible):
+
+ If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
+ non-reversible hybrid hashing of the path is done instead.
+ This encoding uses up to DIR_PREFIX_LEN characters of all directory
+ levels of the lowerencoded path, but not more levels than can fit into
+ _MAX_SHORTENED_DIRS_LEN.
+ Then follows the filler followed by the sha digest of the full path.
+ The filler is the beginning of the basename of the lowerencoded path
+ (the basename is everything after the last path separator). The filler
+ is as long as possible, filling in characters from the basename until
+ the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
+ of the basename have been taken).
+ The extension (e.g. '.i' or '.d') is preserved.
+
+ The string 'data/' at the beginning is replaced with 'dh/', if the hashed
+ encoding was used.
+ '''
+ if not path.startswith('data/'):
+ return path
+ ndpath = path[len('data/'):]
+ res = 'data/' + auxencode(encodefilename(ndpath))
+ if len(res) > MAX_PATH_LEN_IN_HGSTORE:
+ digest = _sha(path).hexdigest()
+ aep = auxencode(lowerencode(ndpath))
+ _root, ext = os.path.splitext(aep)
+ parts = aep.split('/')
+ basename = parts[-1]
+ sdirs = []
+ for p in parts[:-1]:
+ d = p[:DIR_PREFIX_LEN]
+ t = '/'.join(sdirs) + '/' + d
+ if len(t) > _MAX_SHORTENED_DIRS_LEN:
+ break
+ sdirs.append(d)
+ dirs = '/'.join(sdirs)
+ if len(dirs) > 0:
+ dirs += '/'
+ res = 'dh/' + dirs + digest + ext
+ space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
+ if space_left > 0:
+ filler = basename[:space_left]
+ res = 'dh/' + dirs + filler + digest + ext
+ return res
+
def _calcmode(path):
try:
# files in .hg/ will be created using this mode
@@ -120,8 +210,83 @@
return (['requires', '00changelog.i'] +
[self.pathjoiner('store', f) for f in _data.split()])
+def fncache(opener):
+ '''yields the entries in the fncache file'''
+ try:
+ fp = opener('fncache', mode='rb')
+ except IOError:
+ # skip nonexistent file
+ return
+ for n, line in enumerate(fp):
+ if (len(line) < 2) or (line[-1] != '\n'):
+ t = _('invalid entry in fncache, line %s') % (n + 1)
+ raise util.Abort(t)
+ yield line[:-1]
+ fp.close()
+
+class fncacheopener(object):
+ def __init__(self, opener):
+ self.opener = opener
+ self.entries = None
+
+ def loadfncache(self):
+ self.entries = {}
+ for f in fncache(self.opener):
+ self.entries[f] = True
+
+ def __call__(self, path, mode='r', *args, **kw):
+ if mode not in ('r', 'rb') and path.startswith('data/'):
+ if self.entries is None:
+ self.loadfncache()
+ if path not in self.entries:
+ self.opener('fncache', 'ab').write(path + '\n')
+ # fncache may contain non-existent files after rollback / strip
+ self.entries[path] = True
+ return self.opener(hybridencode(path), mode, *args, **kw)
+
+class fncachestore(basicstore):
+ def __init__(self, path, opener, pathjoiner):
+ self.pathjoiner = pathjoiner
+ self.path = self.pathjoiner(path, 'store')
+ self.createmode = _calcmode(self.path)
+ self._op = opener(self.path)
+ self._op.createmode = self.createmode
+ self.opener = fncacheopener(self._op)
+
+ def join(self, f):
+ return self.pathjoiner(self.path, hybridencode(f))
+
+ def datafiles(self):
+ rewrite = False
+ existing = []
+ pjoin = self.pathjoiner
+ spath = self.path
+ for f in fncache(self._op):
+ ef = hybridencode(f)
+ try:
+ st = os.stat(pjoin(spath, ef))
+ yield f, ef, st.st_size
+ existing.append(f)
+ except OSError:
+ # nonexistent entry
+ rewrite = True
+ if rewrite:
+ # rewrite fncache to remove nonexistent entries
+ # (may be caused by rollback / strip)
+ fp = self._op('fncache', mode='wb')
+ for p in existing:
+ fp.write(p + '\n')
+ fp.close()
+
+ def copylist(self):
+ d = _data + ' dh fncache'
+ return (['requires', '00changelog.i'] +
+ [self.pathjoiner('store', f) for f in d.split()])
+
def store(requirements, path, opener, pathjoiner=None):
pathjoiner = pathjoiner or os.path.join
if 'store' in requirements:
+ if 'fncache' in requirements:
+ return fncachestore(path, opener, pathjoiner)
return encodedstore(path, opener, pathjoiner)
return basicstore(path, opener, pathjoiner)
--- a/tests/test-dumprevlog Thu Oct 16 19:40:09 2008 +0300
+++ b/tests/test-dumprevlog Sun Oct 19 19:12:07 2008 +0200
@@ -2,6 +2,7 @@
CONTRIBDIR=$TESTDIR/../contrib
+echo % prepare repo-a
mkdir repo-a
cd repo-a
hg init
@@ -18,11 +19,13 @@
hg verify
-echo dumping revlog of file a to stdout:
+echo
+echo % dumping revlog of file a to stdout
python $CONTRIBDIR/dumprevlog .hg/store/data/a.i
-echo dumprevlog done
+echo % dumprevlog done
-# dump all revlogs to file repo.dump
+echo
+echo % dump all revlogs to file repo.dump
find .hg/store -name "*.i" | sort | xargs python $CONTRIBDIR/dumprevlog > ../repo.dump
cd ..
@@ -31,17 +34,28 @@
cd repo-b
hg init
-echo undumping:
+echo
+echo % undumping into repo-b
python $CONTRIBDIR/undumprevlog < ../repo.dump
-echo undumping done
+echo % undumping done
+
+cd ..
+echo
+echo % clone --pull repo-b repo-c to rebuild fncache
+hg clone --pull -U repo-b repo-c
+
+cd repo-c
+
+echo
+echo % verify repo-c
hg verify
cd ..
-echo comparing repos:
-hg -R repo-b incoming repo-a
-hg -R repo-a incoming repo-b
-echo comparing done
+echo
+echo % comparing repos
+hg -R repo-c incoming repo-a
+hg -R repo-a incoming repo-c
exit 0
--- a/tests/test-dumprevlog.out Thu Oct 16 19:40:09 2008 +0300
+++ b/tests/test-dumprevlog.out Sun Oct 19 19:12:07 2008 +0200
@@ -1,9 +1,11 @@
+% prepare repo-a
checking changesets
checking manifests
crosschecking files in changesets and manifests
checking files
1 files, 3 changesets, 3 total revisions
-dumping revlog of file a to stdout:
+
+% dumping revlog of file a to stdout
file: .hg/store/data/a.i
node: 183d2312b35066fb6b3b449b84efc370d50993d0
linkrev: 0
@@ -32,22 +34,34 @@
adding more to file a
-end-
-dumprevlog done
-undumping:
+% dumprevlog done
+
+% dump all revlogs to file repo.dump
+
+% undumping into repo-b
.hg/store/00changelog.i
.hg/store/00manifest.i
.hg/store/data/a.i
-undumping done
+% undumping done
+
+% clone --pull repo-b repo-c to rebuild fncache
+requesting all changes
+adding changesets
+adding manifests
+adding file changes
+added 3 changesets with 3 changes to 1 files
+
+% verify repo-c
checking changesets
checking manifests
crosschecking files in changesets and manifests
checking files
1 files, 3 changesets, 3 total revisions
-comparing repos:
+
+% comparing repos
comparing with repo-a
searching for changes
no changes found
-comparing with repo-b
+comparing with repo-c
searching for changes
no changes found
-comparing done
--- a/tests/test-inherit-mode.out Thu Oct 16 19:40:09 2008 +0300
+++ b/tests/test-inherit-mode.out Sun Oct 19 19:12:07 2008 +0200
@@ -22,6 +22,7 @@
00770 ./.hg/store/data/dir/
00660 ./.hg/store/data/dir/bar.i
00660 ./.hg/store/data/foo.i
+00660 ./.hg/store/fncache
00660 ./.hg/store/undo
00660 ./.hg/undo.branch
00660 ./.hg/undo.dirstate
@@ -49,6 +50,7 @@
00770 ../push/.hg/store/data/dir/
00660 ../push/.hg/store/data/dir/bar.i
00660 ../push/.hg/store/data/foo.i
+00660 ../push/.hg/store/fncache
00660 ../push/.hg/store/undo
00660 ../push/.hg/undo.branch
00660 ../push/.hg/undo.dirstate
--- a/tests/test-init.out Thu Oct 16 19:40:09 2008 +0300
+++ b/tests/test-init.out Sun Oct 19 19:12:07 2008 +0200
@@ -3,6 +3,7 @@
00changelog.i created
revlogv1
store
+fncache
adding foo
# creating repo with old format
revlogv1
--- a/tests/test-verify.out Thu Oct 16 19:40:09 2008 +0300
+++ b/tests/test-verify.out Sun Oct 19 19:12:07 2008 +0200
@@ -17,7 +17,6 @@
checking manifests
crosschecking files in changesets and manifests
checking files
- ?: cannot decode filename 'data/X_f_o_o.txt.i'
data/FOO.txt.i@0: missing revlog!
0: empty or missing FOO.txt
FOO.txt@0: f62022d3d590 in manifests not found
@@ -27,8 +26,6 @@
data/bar.txt.i@0: missing revlog!
0: empty or missing bar.txt
bar.txt@0: 256559129457 in manifests not found
-warning: orphan revlog 'data/xbar.txt.i'
3 files, 1 changesets, 0 total revisions
-1 warnings encountered!
-10 integrity errors encountered!
+9 integrity errors encountered!
(first damaged changeset appears to be 0)