comparison mercurial/store.py @ 7229:7946503ec76e

introduce fncache repository layout * adds a new entry 'fncache' to '.hg/requires' for new repos * writes new file '.hg/store/fncache' * hash-encodes filenames with long paths (issue839) * encodes Windows reserved filenames (issue793)
author Adrian Buehlmann <adrian@cadifra.com>
date Sun, 19 Oct 2008 19:12:07 +0200
parents 32e68ffccbc5
children db6fbb785800
comparison
equal deleted inserted replaced
7228:9b72c732ed2f 7229:7946503ec76e
3 # Copyright 2008 Matt Mackall <mpm@selenic.com> 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # 4 #
5 # This software may be used and distributed according to the terms 5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference. 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 from i18n import _
8 import os, stat, osutil, util 9 import os, stat, osutil, util
10
11 _sha = util.sha1
9 12
10 def _buildencodefun(): 13 def _buildencodefun():
11 e = '_' 14 e = '_'
12 win_reserved = [ord(x) for x in '\\:*?"<>|'] 15 win_reserved = [ord(x) for x in '\\:*?"<>|']
13 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ]) 16 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
33 return (lambda s: "".join([cmap[c] for c in s]), 36 return (lambda s: "".join([cmap[c] for c in s]),
34 lambda s: "".join(list(decode(s)))) 37 lambda s: "".join(list(decode(s))))
35 38
36 encodefilename, decodefilename = _buildencodefun() 39 encodefilename, decodefilename = _buildencodefun()
37 40
41 def _build_lower_encodefun():
42 win_reserved = [ord(x) for x in '\\:*?"<>|']
43 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
44 for x in (range(32) + range(126, 256) + win_reserved):
45 cmap[chr(x)] = "~%02x" % x
46 for x in range(ord("A"), ord("Z")+1):
47 cmap[chr(x)] = chr(x).lower()
48 return lambda s: "".join([cmap[c] for c in s])
49
50 lowerencode = _build_lower_encodefun()
51
52 _windows_reserved_filenames = '''con prn aux nul
53 com1 com2 com3 com4 com5 com6 com7 com8 com9
54 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
55 def auxencode(path):
56 res = []
57 for n in path.split('/'):
58 if n:
59 base = n.split('.')[0]
60 if base and (base in _windows_reserved_filenames):
61 # encode third letter ('aux' -> 'au~78')
62 ec = "~%02x" % ord(n[2])
63 n = n[0:2] + ec + n[3:]
64 res.append(n)
65 return '/'.join(res)
66
67 MAX_PATH_LEN_IN_HGSTORE = 120
68 DIR_PREFIX_LEN = 8
69 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
70 def hybridencode(path):
71 '''encodes path with a length limit
72
73 Encodes all paths that begin with 'data/', according to the following.
74
75 Default encoding (reversible):
76
77 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
78 characters are encoded as '~xx', where xx is the two digit hex code
79 of the character (see encodefilename).
80 Relevant path components consisting of Windows reserved filenames are
81 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
82
83 Hashed encoding (not reversible):
84
85 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
86 non-reversible hybrid hashing of the path is done instead.
87 This encoding uses up to DIR_PREFIX_LEN characters of all directory
88 levels of the lowerencoded path, but not more levels than can fit into
89 _MAX_SHORTENED_DIRS_LEN.
90 Then follows the filler followed by the sha digest of the full path.
91 The filler is the beginning of the basename of the lowerencoded path
92 (the basename is everything after the last path separator). The filler
93 is as long as possible, filling in characters from the basename until
94 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
95 of the basename have been taken).
96 The extension (e.g. '.i' or '.d') is preserved.
97
98 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
99 encoding was used.
100 '''
101 if not path.startswith('data/'):
102 return path
103 ndpath = path[len('data/'):]
104 res = 'data/' + auxencode(encodefilename(ndpath))
105 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
106 digest = _sha(path).hexdigest()
107 aep = auxencode(lowerencode(ndpath))
108 _root, ext = os.path.splitext(aep)
109 parts = aep.split('/')
110 basename = parts[-1]
111 sdirs = []
112 for p in parts[:-1]:
113 d = p[:DIR_PREFIX_LEN]
114 t = '/'.join(sdirs) + '/' + d
115 if len(t) > _MAX_SHORTENED_DIRS_LEN:
116 break
117 sdirs.append(d)
118 dirs = '/'.join(sdirs)
119 if len(dirs) > 0:
120 dirs += '/'
121 res = 'dh/' + dirs + digest + ext
122 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
123 if space_left > 0:
124 filler = basename[:space_left]
125 res = 'dh/' + dirs + filler + digest + ext
126 return res
127
38 def _calcmode(path): 128 def _calcmode(path):
39 try: 129 try:
40 # files in .hg/ will be created using this mode 130 # files in .hg/ will be created using this mode
41 mode = os.stat(path).st_mode 131 mode = os.stat(path).st_mode
42 # avoid some useless chmods 132 # avoid some useless chmods
118 208
119 def copylist(self): 209 def copylist(self):
120 return (['requires', '00changelog.i'] + 210 return (['requires', '00changelog.i'] +
121 [self.pathjoiner('store', f) for f in _data.split()]) 211 [self.pathjoiner('store', f) for f in _data.split()])
122 212
213 def fncache(opener):
214 '''yields the entries in the fncache file'''
215 try:
216 fp = opener('fncache', mode='rb')
217 except IOError:
218 # skip nonexistent file
219 return
220 for n, line in enumerate(fp):
221 if (len(line) < 2) or (line[-1] != '\n'):
222 t = _('invalid entry in fncache, line %s') % (n + 1)
223 raise util.Abort(t)
224 yield line[:-1]
225 fp.close()
226
227 class fncacheopener(object):
228 def __init__(self, opener):
229 self.opener = opener
230 self.entries = None
231
232 def loadfncache(self):
233 self.entries = {}
234 for f in fncache(self.opener):
235 self.entries[f] = True
236
237 def __call__(self, path, mode='r', *args, **kw):
238 if mode not in ('r', 'rb') and path.startswith('data/'):
239 if self.entries is None:
240 self.loadfncache()
241 if path not in self.entries:
242 self.opener('fncache', 'ab').write(path + '\n')
243 # fncache may contain non-existent files after rollback / strip
244 self.entries[path] = True
245 return self.opener(hybridencode(path), mode, *args, **kw)
246
247 class fncachestore(basicstore):
248 def __init__(self, path, opener, pathjoiner):
249 self.pathjoiner = pathjoiner
250 self.path = self.pathjoiner(path, 'store')
251 self.createmode = _calcmode(self.path)
252 self._op = opener(self.path)
253 self._op.createmode = self.createmode
254 self.opener = fncacheopener(self._op)
255
256 def join(self, f):
257 return self.pathjoiner(self.path, hybridencode(f))
258
259 def datafiles(self):
260 rewrite = False
261 existing = []
262 pjoin = self.pathjoiner
263 spath = self.path
264 for f in fncache(self._op):
265 ef = hybridencode(f)
266 try:
267 st = os.stat(pjoin(spath, ef))
268 yield f, ef, st.st_size
269 existing.append(f)
270 except OSError:
271 # nonexistent entry
272 rewrite = True
273 if rewrite:
274 # rewrite fncache to remove nonexistent entries
275 # (may be caused by rollback / strip)
276 fp = self._op('fncache', mode='wb')
277 for p in existing:
278 fp.write(p + '\n')
279 fp.close()
280
281 def copylist(self):
282 d = _data + ' dh fncache'
283 return (['requires', '00changelog.i'] +
284 [self.pathjoiner('store', f) for f in d.split()])
285
123 def store(requirements, path, opener, pathjoiner=None): 286 def store(requirements, path, opener, pathjoiner=None):
124 pathjoiner = pathjoiner or os.path.join 287 pathjoiner = pathjoiner or os.path.join
125 if 'store' in requirements: 288 if 'store' in requirements:
289 if 'fncache' in requirements:
290 return fncachestore(path, opener, pathjoiner)
126 return encodedstore(path, opener, pathjoiner) 291 return encodedstore(path, opener, pathjoiner)
127 return basicstore(path, opener, pathjoiner) 292 return basicstore(path, opener, pathjoiner)