Mercurial > hg
comparison mercurial/store.py @ 7229:7946503ec76e
introduce fncache repository layout
* adds a new entry 'fncache' to '.hg/requires' for new repos
* writes new file '.hg/store/fncache'
* hash-encodes filenames with long paths (issue839)
* encodes Windows reserved filenames (issue793)
author | Adrian Buehlmann <adrian@cadifra.com> |
---|---|
date | Sun, 19 Oct 2008 19:12:07 +0200 |
parents | 32e68ffccbc5 |
children | db6fbb785800 |
comparison
equal
deleted
inserted
replaced
7228:9b72c732ed2f | 7229:7946503ec76e |
---|---|
3 # Copyright 2008 Matt Mackall <mpm@selenic.com> | 3 # Copyright 2008 Matt Mackall <mpm@selenic.com> |
4 # | 4 # |
5 # This software may be used and distributed according to the terms | 5 # This software may be used and distributed according to the terms |
6 # of the GNU General Public License, incorporated herein by reference. | 6 # of the GNU General Public License, incorporated herein by reference. |
7 | 7 |
8 from i18n import _ | |
8 import os, stat, osutil, util | 9 import os, stat, osutil, util |
10 | |
11 _sha = util.sha1 | |
9 | 12 |
10 def _buildencodefun(): | 13 def _buildencodefun(): |
11 e = '_' | 14 e = '_' |
12 win_reserved = [ord(x) for x in '\\:*?"<>|'] | 15 win_reserved = [ord(x) for x in '\\:*?"<>|'] |
13 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ]) | 16 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ]) |
33 return (lambda s: "".join([cmap[c] for c in s]), | 36 return (lambda s: "".join([cmap[c] for c in s]), |
34 lambda s: "".join(list(decode(s)))) | 37 lambda s: "".join(list(decode(s)))) |
35 | 38 |
36 encodefilename, decodefilename = _buildencodefun() | 39 encodefilename, decodefilename = _buildencodefun() |
37 | 40 |
41 def _build_lower_encodefun(): | |
42 win_reserved = [ord(x) for x in '\\:*?"<>|'] | |
43 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ]) | |
44 for x in (range(32) + range(126, 256) + win_reserved): | |
45 cmap[chr(x)] = "~%02x" % x | |
46 for x in range(ord("A"), ord("Z")+1): | |
47 cmap[chr(x)] = chr(x).lower() | |
48 return lambda s: "".join([cmap[c] for c in s]) | |
49 | |
50 lowerencode = _build_lower_encodefun() | |
51 | |
52 _windows_reserved_filenames = '''con prn aux nul | |
53 com1 com2 com3 com4 com5 com6 com7 com8 com9 | |
54 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split() | |
55 def auxencode(path): | |
56 res = [] | |
57 for n in path.split('/'): | |
58 if n: | |
59 base = n.split('.')[0] | |
60 if base and (base in _windows_reserved_filenames): | |
61 # encode third letter ('aux' -> 'au~78') | |
62 ec = "~%02x" % ord(n[2]) | |
63 n = n[0:2] + ec + n[3:] | |
64 res.append(n) | |
65 return '/'.join(res) | |
66 | |
67 MAX_PATH_LEN_IN_HGSTORE = 120 | |
68 DIR_PREFIX_LEN = 8 | |
69 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4 | |
70 def hybridencode(path): | |
71 '''encodes path with a length limit | |
72 | |
73 Encodes all paths that begin with 'data/', according to the following. | |
74 | |
75 Default encoding (reversible): | |
76 | |
77 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal | |
78 characters are encoded as '~xx', where xx is the two digit hex code | |
79 of the character (see encodefilename). | |
80 Relevant path components consisting of Windows reserved filenames are | |
81 masked by encoding the third character ('aux' -> 'au~78', see auxencode). | |
82 | |
83 Hashed encoding (not reversible): | |
84 | |
85 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a | |
86 non-reversible hybrid hashing of the path is done instead. | |
87 This encoding uses up to DIR_PREFIX_LEN characters of all directory | |
88 levels of the lowerencoded path, but not more levels than can fit into | |
89 _MAX_SHORTENED_DIRS_LEN. | |
90 Then follows the filler followed by the sha digest of the full path. | |
91 The filler is the beginning of the basename of the lowerencoded path | |
92 (the basename is everything after the last path separator). The filler | |
93 is as long as possible, filling in characters from the basename until | |
94 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars | |
95 of the basename have been taken). | |
96 The extension (e.g. '.i' or '.d') is preserved. | |
97 | |
98 The string 'data/' at the beginning is replaced with 'dh/', if the hashed | |
99 encoding was used. | |
100 ''' | |
101 if not path.startswith('data/'): | |
102 return path | |
103 ndpath = path[len('data/'):] | |
104 res = 'data/' + auxencode(encodefilename(ndpath)) | |
105 if len(res) > MAX_PATH_LEN_IN_HGSTORE: | |
106 digest = _sha(path).hexdigest() | |
107 aep = auxencode(lowerencode(ndpath)) | |
108 _root, ext = os.path.splitext(aep) | |
109 parts = aep.split('/') | |
110 basename = parts[-1] | |
111 sdirs = [] | |
112 for p in parts[:-1]: | |
113 d = p[:DIR_PREFIX_LEN] | |
114 t = '/'.join(sdirs) + '/' + d | |
115 if len(t) > _MAX_SHORTENED_DIRS_LEN: | |
116 break | |
117 sdirs.append(d) | |
118 dirs = '/'.join(sdirs) | |
119 if len(dirs) > 0: | |
120 dirs += '/' | |
121 res = 'dh/' + dirs + digest + ext | |
122 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res) | |
123 if space_left > 0: | |
124 filler = basename[:space_left] | |
125 res = 'dh/' + dirs + filler + digest + ext | |
126 return res | |
127 | |
38 def _calcmode(path): | 128 def _calcmode(path): |
39 try: | 129 try: |
40 # files in .hg/ will be created using this mode | 130 # files in .hg/ will be created using this mode |
41 mode = os.stat(path).st_mode | 131 mode = os.stat(path).st_mode |
42 # avoid some useless chmods | 132 # avoid some useless chmods |
118 | 208 |
119 def copylist(self): | 209 def copylist(self): |
120 return (['requires', '00changelog.i'] + | 210 return (['requires', '00changelog.i'] + |
121 [self.pathjoiner('store', f) for f in _data.split()]) | 211 [self.pathjoiner('store', f) for f in _data.split()]) |
122 | 212 |
213 def fncache(opener): | |
214 '''yields the entries in the fncache file''' | |
215 try: | |
216 fp = opener('fncache', mode='rb') | |
217 except IOError: | |
218 # skip nonexistent file | |
219 return | |
220 for n, line in enumerate(fp): | |
221 if (len(line) < 2) or (line[-1] != '\n'): | |
222 t = _('invalid entry in fncache, line %s') % (n + 1) | |
223 raise util.Abort(t) | |
224 yield line[:-1] | |
225 fp.close() | |
226 | |
227 class fncacheopener(object): | |
228 def __init__(self, opener): | |
229 self.opener = opener | |
230 self.entries = None | |
231 | |
232 def loadfncache(self): | |
233 self.entries = {} | |
234 for f in fncache(self.opener): | |
235 self.entries[f] = True | |
236 | |
237 def __call__(self, path, mode='r', *args, **kw): | |
238 if mode not in ('r', 'rb') and path.startswith('data/'): | |
239 if self.entries is None: | |
240 self.loadfncache() | |
241 if path not in self.entries: | |
242 self.opener('fncache', 'ab').write(path + '\n') | |
243 # fncache may contain non-existent files after rollback / strip | |
244 self.entries[path] = True | |
245 return self.opener(hybridencode(path), mode, *args, **kw) | |
246 | |
247 class fncachestore(basicstore): | |
248 def __init__(self, path, opener, pathjoiner): | |
249 self.pathjoiner = pathjoiner | |
250 self.path = self.pathjoiner(path, 'store') | |
251 self.createmode = _calcmode(self.path) | |
252 self._op = opener(self.path) | |
253 self._op.createmode = self.createmode | |
254 self.opener = fncacheopener(self._op) | |
255 | |
256 def join(self, f): | |
257 return self.pathjoiner(self.path, hybridencode(f)) | |
258 | |
259 def datafiles(self): | |
260 rewrite = False | |
261 existing = [] | |
262 pjoin = self.pathjoiner | |
263 spath = self.path | |
264 for f in fncache(self._op): | |
265 ef = hybridencode(f) | |
266 try: | |
267 st = os.stat(pjoin(spath, ef)) | |
268 yield f, ef, st.st_size | |
269 existing.append(f) | |
270 except OSError: | |
271 # nonexistent entry | |
272 rewrite = True | |
273 if rewrite: | |
274 # rewrite fncache to remove nonexistent entries | |
275 # (may be caused by rollback / strip) | |
276 fp = self._op('fncache', mode='wb') | |
277 for p in existing: | |
278 fp.write(p + '\n') | |
279 fp.close() | |
280 | |
281 def copylist(self): | |
282 d = _data + ' dh fncache' | |
283 return (['requires', '00changelog.i'] + | |
284 [self.pathjoiner('store', f) for f in d.split()]) | |
285 | |
123 def store(requirements, path, opener, pathjoiner=None): | 286 def store(requirements, path, opener, pathjoiner=None): |
124 pathjoiner = pathjoiner or os.path.join | 287 pathjoiner = pathjoiner or os.path.join |
125 if 'store' in requirements: | 288 if 'store' in requirements: |
289 if 'fncache' in requirements: | |
290 return fncachestore(path, opener, pathjoiner) | |
126 return encodedstore(path, opener, pathjoiner) | 291 return encodedstore(path, opener, pathjoiner) |
127 return basicstore(path, opener, pathjoiner) | 292 return basicstore(path, opener, pathjoiner) |