Mercurial > hg
diff mercurial/store.py @ 7229:7946503ec76e
introduce fncache repository layout
* adds a new entry 'fncache' to '.hg/requires' for new repos
* writes new file '.hg/store/fncache'
* hash-encodes filenames with long paths (issue839)
* encodes Windows reserved filenames (issue793)
author | Adrian Buehlmann <adrian@cadifra.com> |
---|---|
date | Sun, 19 Oct 2008 19:12:07 +0200 |
parents | 32e68ffccbc5 |
children | db6fbb785800 |
line wrap: on
line diff
--- a/mercurial/store.py Thu Oct 16 19:40:09 2008 +0300 +++ b/mercurial/store.py Sun Oct 19 19:12:07 2008 +0200 @@ -5,8 +5,11 @@ # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. +from i18n import _ import os, stat, osutil, util +_sha = util.sha1 + def _buildencodefun(): e = '_' win_reserved = [ord(x) for x in '\\:*?"<>|'] @@ -35,6 +38,93 @@ encodefilename, decodefilename = _buildencodefun() +def _build_lower_encodefun(): + win_reserved = [ord(x) for x in '\\:*?"<>|'] + cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ]) + for x in (range(32) + range(126, 256) + win_reserved): + cmap[chr(x)] = "~%02x" % x + for x in range(ord("A"), ord("Z")+1): + cmap[chr(x)] = chr(x).lower() + return lambda s: "".join([cmap[c] for c in s]) + +lowerencode = _build_lower_encodefun() + +_windows_reserved_filenames = '''con prn aux nul + com1 com2 com3 com4 com5 com6 com7 com8 com9 + lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split() +def auxencode(path): + res = [] + for n in path.split('/'): + if n: + base = n.split('.')[0] + if base and (base in _windows_reserved_filenames): + # encode third letter ('aux' -> 'au~78') + ec = "~%02x" % ord(n[2]) + n = n[0:2] + ec + n[3:] + res.append(n) + return '/'.join(res) + +MAX_PATH_LEN_IN_HGSTORE = 120 +DIR_PREFIX_LEN = 8 +_MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4 +def hybridencode(path): + '''encodes path with a length limit + + Encodes all paths that begin with 'data/', according to the following. + + Default encoding (reversible): + + Encodes all uppercase letters 'X' as '_x'. All reserved or illegal + characters are encoded as '~xx', where xx is the two digit hex code + of the character (see encodefilename). + Relevant path components consisting of Windows reserved filenames are + masked by encoding the third character ('aux' -> 'au~78', see auxencode). + + Hashed encoding (not reversible): + + If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a + non-reversible hybrid hashing of the path is done instead. + This encoding uses up to DIR_PREFIX_LEN characters of all directory + levels of the lowerencoded path, but not more levels than can fit into + _MAX_SHORTENED_DIRS_LEN. + Then follows the filler followed by the sha digest of the full path. + The filler is the beginning of the basename of the lowerencoded path + (the basename is everything after the last path separator). The filler + is as long as possible, filling in characters from the basename until + the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars + of the basename have been taken). + The extension (e.g. '.i' or '.d') is preserved. + + The string 'data/' at the beginning is replaced with 'dh/', if the hashed + encoding was used. + ''' + if not path.startswith('data/'): + return path + ndpath = path[len('data/'):] + res = 'data/' + auxencode(encodefilename(ndpath)) + if len(res) > MAX_PATH_LEN_IN_HGSTORE: + digest = _sha(path).hexdigest() + aep = auxencode(lowerencode(ndpath)) + _root, ext = os.path.splitext(aep) + parts = aep.split('/') + basename = parts[-1] + sdirs = [] + for p in parts[:-1]: + d = p[:DIR_PREFIX_LEN] + t = '/'.join(sdirs) + '/' + d + if len(t) > _MAX_SHORTENED_DIRS_LEN: + break + sdirs.append(d) + dirs = '/'.join(sdirs) + if len(dirs) > 0: + dirs += '/' + res = 'dh/' + dirs + digest + ext + space_left = MAX_PATH_LEN_IN_HGSTORE - len(res) + if space_left > 0: + filler = basename[:space_left] + res = 'dh/' + dirs + filler + digest + ext + return res + def _calcmode(path): try: # files in .hg/ will be created using this mode @@ -120,8 +210,83 @@ return (['requires', '00changelog.i'] + [self.pathjoiner('store', f) for f in _data.split()]) +def fncache(opener): + '''yields the entries in the fncache file''' + try: + fp = opener('fncache', mode='rb') + except IOError: + # skip nonexistent file + return + for n, line in enumerate(fp): + if (len(line) < 2) or (line[-1] != '\n'): + t = _('invalid entry in fncache, line %s') % (n + 1) + raise util.Abort(t) + yield line[:-1] + fp.close() + +class fncacheopener(object): + def __init__(self, opener): + self.opener = opener + self.entries = None + + def loadfncache(self): + self.entries = {} + for f in fncache(self.opener): + self.entries[f] = True + + def __call__(self, path, mode='r', *args, **kw): + if mode not in ('r', 'rb') and path.startswith('data/'): + if self.entries is None: + self.loadfncache() + if path not in self.entries: + self.opener('fncache', 'ab').write(path + '\n') + # fncache may contain non-existent files after rollback / strip + self.entries[path] = True + return self.opener(hybridencode(path), mode, *args, **kw) + +class fncachestore(basicstore): + def __init__(self, path, opener, pathjoiner): + self.pathjoiner = pathjoiner + self.path = self.pathjoiner(path, 'store') + self.createmode = _calcmode(self.path) + self._op = opener(self.path) + self._op.createmode = self.createmode + self.opener = fncacheopener(self._op) + + def join(self, f): + return self.pathjoiner(self.path, hybridencode(f)) + + def datafiles(self): + rewrite = False + existing = [] + pjoin = self.pathjoiner + spath = self.path + for f in fncache(self._op): + ef = hybridencode(f) + try: + st = os.stat(pjoin(spath, ef)) + yield f, ef, st.st_size + existing.append(f) + except OSError: + # nonexistent entry + rewrite = True + if rewrite: + # rewrite fncache to remove nonexistent entries + # (may be caused by rollback / strip) + fp = self._op('fncache', mode='wb') + for p in existing: + fp.write(p + '\n') + fp.close() + + def copylist(self): + d = _data + ' dh fncache' + return (['requires', '00changelog.i'] + + [self.pathjoiner('store', f) for f in d.split()]) + def store(requirements, path, opener, pathjoiner=None): pathjoiner = pathjoiner or os.path.join if 'store' in requirements: + if 'fncache' in requirements: + return fncachestore(path, opener, pathjoiner) return encodedstore(path, opener, pathjoiner) return basicstore(path, opener, pathjoiner)