Mercurial > hg
changeset 7229:7946503ec76e
introduce fncache repository layout
* adds a new entry 'fncache' to '.hg/requires' for new repos
* writes new file '.hg/store/fncache'
* hash-encodes filenames with long paths (issue839)
* encodes Windows reserved filenames (issue793)
author | Adrian Buehlmann <adrian@cadifra.com> |
---|---|
date | Sun, 19 Oct 2008 19:12:07 +0200 |
parents | 9b72c732ed2f |
children | 261a9f47b44b |
files | mercurial/localrepo.py mercurial/store.py tests/test-dumprevlog tests/test-dumprevlog.out tests/test-inherit-mode.out tests/test-init.out tests/test-verify.out |
diffstat | 7 files changed, 215 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/localrepo.py Thu Oct 16 19:40:09 2008 +0300 +++ b/mercurial/localrepo.py Sun Oct 19 19:12:07 2008 +0200 @@ -16,7 +16,7 @@ class localrepository(repo.repository): capabilities = util.set(('lookup', 'changegroupsubset')) - supported = ('revlogv1', 'store') + supported = ('revlogv1', 'store', 'fncache') def __init__(self, parentui, path=None, create=0): repo.repository.__init__(self) @@ -35,6 +35,7 @@ if parentui.configbool('format', 'usestore', True): os.mkdir(os.path.join(self.path, "store")) requirements.append("store") + requirements.append("fncache") # create an invalid changelog self.opener("00changelog.i", "a").write( '\0\0\0\2' # represents revlogv2
--- a/mercurial/store.py Thu Oct 16 19:40:09 2008 +0300 +++ b/mercurial/store.py Sun Oct 19 19:12:07 2008 +0200 @@ -5,8 +5,11 @@ # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. +from i18n import _ import os, stat, osutil, util +_sha = util.sha1 + def _buildencodefun(): e = '_' win_reserved = [ord(x) for x in '\\:*?"<>|'] @@ -35,6 +38,93 @@ encodefilename, decodefilename = _buildencodefun() +def _build_lower_encodefun(): + win_reserved = [ord(x) for x in '\\:*?"<>|'] + cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ]) + for x in (range(32) + range(126, 256) + win_reserved): + cmap[chr(x)] = "~%02x" % x + for x in range(ord("A"), ord("Z")+1): + cmap[chr(x)] = chr(x).lower() + return lambda s: "".join([cmap[c] for c in s]) + +lowerencode = _build_lower_encodefun() + +_windows_reserved_filenames = '''con prn aux nul + com1 com2 com3 com4 com5 com6 com7 com8 com9 + lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split() +def auxencode(path): + res = [] + for n in path.split('/'): + if n: + base = n.split('.')[0] + if base and (base in _windows_reserved_filenames): + # encode third letter ('aux' -> 'au~78') + ec = "~%02x" % ord(n[2]) + n = n[0:2] + ec + n[3:] + res.append(n) + return '/'.join(res) + +MAX_PATH_LEN_IN_HGSTORE = 120 +DIR_PREFIX_LEN = 8 +_MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4 +def hybridencode(path): + '''encodes path with a length limit + + Encodes all paths that begin with 'data/', according to the following. + + Default encoding (reversible): + + Encodes all uppercase letters 'X' as '_x'. All reserved or illegal + characters are encoded as '~xx', where xx is the two digit hex code + of the character (see encodefilename). + Relevant path components consisting of Windows reserved filenames are + masked by encoding the third character ('aux' -> 'au~78', see auxencode). + + Hashed encoding (not reversible): + + If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a + non-reversible hybrid hashing of the path is done instead. + This encoding uses up to DIR_PREFIX_LEN characters of all directory + levels of the lowerencoded path, but not more levels than can fit into + _MAX_SHORTENED_DIRS_LEN. + Then follows the filler followed by the sha digest of the full path. + The filler is the beginning of the basename of the lowerencoded path + (the basename is everything after the last path separator). The filler + is as long as possible, filling in characters from the basename until + the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars + of the basename have been taken). + The extension (e.g. '.i' or '.d') is preserved. + + The string 'data/' at the beginning is replaced with 'dh/', if the hashed + encoding was used. + ''' + if not path.startswith('data/'): + return path + ndpath = path[len('data/'):] + res = 'data/' + auxencode(encodefilename(ndpath)) + if len(res) > MAX_PATH_LEN_IN_HGSTORE: + digest = _sha(path).hexdigest() + aep = auxencode(lowerencode(ndpath)) + _root, ext = os.path.splitext(aep) + parts = aep.split('/') + basename = parts[-1] + sdirs = [] + for p in parts[:-1]: + d = p[:DIR_PREFIX_LEN] + t = '/'.join(sdirs) + '/' + d + if len(t) > _MAX_SHORTENED_DIRS_LEN: + break + sdirs.append(d) + dirs = '/'.join(sdirs) + if len(dirs) > 0: + dirs += '/' + res = 'dh/' + dirs + digest + ext + space_left = MAX_PATH_LEN_IN_HGSTORE - len(res) + if space_left > 0: + filler = basename[:space_left] + res = 'dh/' + dirs + filler + digest + ext + return res + def _calcmode(path): try: # files in .hg/ will be created using this mode @@ -120,8 +210,83 @@ return (['requires', '00changelog.i'] + [self.pathjoiner('store', f) for f in _data.split()]) +def fncache(opener): + '''yields the entries in the fncache file''' + try: + fp = opener('fncache', mode='rb') + except IOError: + # skip nonexistent file + return + for n, line in enumerate(fp): + if (len(line) < 2) or (line[-1] != '\n'): + t = _('invalid entry in fncache, line %s') % (n + 1) + raise util.Abort(t) + yield line[:-1] + fp.close() + +class fncacheopener(object): + def __init__(self, opener): + self.opener = opener + self.entries = None + + def loadfncache(self): + self.entries = {} + for f in fncache(self.opener): + self.entries[f] = True + + def __call__(self, path, mode='r', *args, **kw): + if mode not in ('r', 'rb') and path.startswith('data/'): + if self.entries is None: + self.loadfncache() + if path not in self.entries: + self.opener('fncache', 'ab').write(path + '\n') + # fncache may contain non-existent files after rollback / strip + self.entries[path] = True + return self.opener(hybridencode(path), mode, *args, **kw) + +class fncachestore(basicstore): + def __init__(self, path, opener, pathjoiner): + self.pathjoiner = pathjoiner + self.path = self.pathjoiner(path, 'store') + self.createmode = _calcmode(self.path) + self._op = opener(self.path) + self._op.createmode = self.createmode + self.opener = fncacheopener(self._op) + + def join(self, f): + return self.pathjoiner(self.path, hybridencode(f)) + + def datafiles(self): + rewrite = False + existing = [] + pjoin = self.pathjoiner + spath = self.path + for f in fncache(self._op): + ef = hybridencode(f) + try: + st = os.stat(pjoin(spath, ef)) + yield f, ef, st.st_size + existing.append(f) + except OSError: + # nonexistent entry + rewrite = True + if rewrite: + # rewrite fncache to remove nonexistent entries + # (may be caused by rollback / strip) + fp = self._op('fncache', mode='wb') + for p in existing: + fp.write(p + '\n') + fp.close() + + def copylist(self): + d = _data + ' dh fncache' + return (['requires', '00changelog.i'] + + [self.pathjoiner('store', f) for f in d.split()]) + def store(requirements, path, opener, pathjoiner=None): pathjoiner = pathjoiner or os.path.join if 'store' in requirements: + if 'fncache' in requirements: + return fncachestore(path, opener, pathjoiner) return encodedstore(path, opener, pathjoiner) return basicstore(path, opener, pathjoiner)
--- a/tests/test-dumprevlog Thu Oct 16 19:40:09 2008 +0300 +++ b/tests/test-dumprevlog Sun Oct 19 19:12:07 2008 +0200 @@ -2,6 +2,7 @@ CONTRIBDIR=$TESTDIR/../contrib +echo % prepare repo-a mkdir repo-a cd repo-a hg init @@ -18,11 +19,13 @@ hg verify -echo dumping revlog of file a to stdout: +echo +echo % dumping revlog of file a to stdout python $CONTRIBDIR/dumprevlog .hg/store/data/a.i -echo dumprevlog done +echo % dumprevlog done -# dump all revlogs to file repo.dump +echo +echo % dump all revlogs to file repo.dump find .hg/store -name "*.i" | sort | xargs python $CONTRIBDIR/dumprevlog > ../repo.dump cd .. @@ -31,17 +34,28 @@ cd repo-b hg init -echo undumping: +echo +echo % undumping into repo-b python $CONTRIBDIR/undumprevlog < ../repo.dump -echo undumping done +echo % undumping done + +cd .. +echo +echo % clone --pull repo-b repo-c to rebuild fncache +hg clone --pull -U repo-b repo-c + +cd repo-c + +echo +echo % verify repo-c hg verify cd .. -echo comparing repos: -hg -R repo-b incoming repo-a -hg -R repo-a incoming repo-b -echo comparing done +echo +echo % comparing repos +hg -R repo-c incoming repo-a +hg -R repo-a incoming repo-c exit 0
--- a/tests/test-dumprevlog.out Thu Oct 16 19:40:09 2008 +0300 +++ b/tests/test-dumprevlog.out Sun Oct 19 19:12:07 2008 +0200 @@ -1,9 +1,11 @@ +% prepare repo-a checking changesets checking manifests crosschecking files in changesets and manifests checking files 1 files, 3 changesets, 3 total revisions -dumping revlog of file a to stdout: + +% dumping revlog of file a to stdout file: .hg/store/data/a.i node: 183d2312b35066fb6b3b449b84efc370d50993d0 linkrev: 0 @@ -32,22 +34,34 @@ adding more to file a -end- -dumprevlog done -undumping: +% dumprevlog done + +% dump all revlogs to file repo.dump + +% undumping into repo-b .hg/store/00changelog.i .hg/store/00manifest.i .hg/store/data/a.i -undumping done +% undumping done + +% clone --pull repo-b repo-c to rebuild fncache +requesting all changes +adding changesets +adding manifests +adding file changes +added 3 changesets with 3 changes to 1 files + +% verify repo-c checking changesets checking manifests crosschecking files in changesets and manifests checking files 1 files, 3 changesets, 3 total revisions -comparing repos: + +% comparing repos comparing with repo-a searching for changes no changes found -comparing with repo-b +comparing with repo-c searching for changes no changes found -comparing done
--- a/tests/test-inherit-mode.out Thu Oct 16 19:40:09 2008 +0300 +++ b/tests/test-inherit-mode.out Sun Oct 19 19:12:07 2008 +0200 @@ -22,6 +22,7 @@ 00770 ./.hg/store/data/dir/ 00660 ./.hg/store/data/dir/bar.i 00660 ./.hg/store/data/foo.i +00660 ./.hg/store/fncache 00660 ./.hg/store/undo 00660 ./.hg/undo.branch 00660 ./.hg/undo.dirstate @@ -49,6 +50,7 @@ 00770 ../push/.hg/store/data/dir/ 00660 ../push/.hg/store/data/dir/bar.i 00660 ../push/.hg/store/data/foo.i +00660 ../push/.hg/store/fncache 00660 ../push/.hg/store/undo 00660 ../push/.hg/undo.branch 00660 ../push/.hg/undo.dirstate
--- a/tests/test-init.out Thu Oct 16 19:40:09 2008 +0300 +++ b/tests/test-init.out Sun Oct 19 19:12:07 2008 +0200 @@ -3,6 +3,7 @@ 00changelog.i created revlogv1 store +fncache adding foo # creating repo with old format revlogv1
--- a/tests/test-verify.out Thu Oct 16 19:40:09 2008 +0300 +++ b/tests/test-verify.out Sun Oct 19 19:12:07 2008 +0200 @@ -17,7 +17,6 @@ checking manifests crosschecking files in changesets and manifests checking files - ?: cannot decode filename 'data/X_f_o_o.txt.i' data/FOO.txt.i@0: missing revlog! 0: empty or missing FOO.txt FOO.txt@0: f62022d3d590 in manifests not found @@ -27,8 +26,6 @@ data/bar.txt.i@0: missing revlog! 0: empty or missing bar.txt bar.txt@0: 256559129457 in manifests not found -warning: orphan revlog 'data/xbar.txt.i' 3 files, 1 changesets, 0 total revisions -1 warnings encountered! -10 integrity errors encountered! +9 integrity errors encountered! (first damaged changeset appears to be 0)