|
1 # shallowutil.py -- remotefilelog utilities |
|
2 # |
|
3 # Copyright 2014 Facebook, Inc. |
|
4 # |
|
5 # This software may be used and distributed according to the terms of the |
|
6 # GNU General Public License version 2 or any later version. |
|
7 from __future__ import absolute_import |
|
8 |
|
9 import collections |
|
10 import errno |
|
11 import hashlib |
|
12 import os |
|
13 import stat |
|
14 import struct |
|
15 import tempfile |
|
16 |
|
17 from mercurial.i18n import _ |
|
18 from mercurial import ( |
|
19 error, |
|
20 pycompat, |
|
21 revlog, |
|
22 util, |
|
23 ) |
|
24 from mercurial.utils import ( |
|
25 storageutil, |
|
26 stringutil, |
|
27 ) |
|
28 from . import constants |
|
29 |
|
30 if not pycompat.iswindows: |
|
31 import grp |
|
32 |
|
33 def getcachekey(reponame, file, id): |
|
34 pathhash = hashlib.sha1(file).hexdigest() |
|
35 return os.path.join(reponame, pathhash[:2], pathhash[2:], id) |
|
36 |
|
37 def getlocalkey(file, id): |
|
38 pathhash = hashlib.sha1(file).hexdigest() |
|
39 return os.path.join(pathhash, id) |
|
40 |
|
41 def getcachepath(ui, allowempty=False): |
|
42 cachepath = ui.config("remotefilelog", "cachepath") |
|
43 if not cachepath: |
|
44 if allowempty: |
|
45 return None |
|
46 else: |
|
47 raise error.Abort(_("could not find config option " |
|
48 "remotefilelog.cachepath")) |
|
49 return util.expandpath(cachepath) |
|
50 |
|
51 def getcachepackpath(repo, category): |
|
52 cachepath = getcachepath(repo.ui) |
|
53 if category != constants.FILEPACK_CATEGORY: |
|
54 return os.path.join(cachepath, repo.name, 'packs', category) |
|
55 else: |
|
56 return os.path.join(cachepath, repo.name, 'packs') |
|
57 |
|
58 def getlocalpackpath(base, category): |
|
59 return os.path.join(base, 'packs', category) |
|
60 |
|
61 def createrevlogtext(text, copyfrom=None, copyrev=None): |
|
62 """returns a string that matches the revlog contents in a |
|
63 traditional revlog |
|
64 """ |
|
65 meta = {} |
|
66 if copyfrom or text.startswith('\1\n'): |
|
67 if copyfrom: |
|
68 meta['copy'] = copyfrom |
|
69 meta['copyrev'] = copyrev |
|
70 text = storageutil.packmeta(meta, text) |
|
71 |
|
72 return text |
|
73 |
|
74 def parsemeta(text): |
|
75 """parse mercurial filelog metadata""" |
|
76 meta, size = storageutil.parsemeta(text) |
|
77 if text.startswith('\1\n'): |
|
78 s = text.index('\1\n', 2) |
|
79 text = text[s + 2:] |
|
80 return meta or {}, text |
|
81 |
|
82 def sumdicts(*dicts): |
|
83 """Adds all the values of *dicts together into one dictionary. This assumes |
|
84 the values in *dicts are all summable. |
|
85 |
|
86 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1} |
|
87 """ |
|
88 result = collections.defaultdict(lambda: 0) |
|
89 for dict in dicts: |
|
90 for k, v in dict.iteritems(): |
|
91 result[k] += v |
|
92 return result |
|
93 |
|
94 def prefixkeys(dict, prefix): |
|
95 """Returns ``dict`` with ``prefix`` prepended to all its keys.""" |
|
96 result = {} |
|
97 for k, v in dict.iteritems(): |
|
98 result[prefix + k] = v |
|
99 return result |
|
100 |
|
101 def reportpackmetrics(ui, prefix, *stores): |
|
102 dicts = [s.getmetrics() for s in stores] |
|
103 dict = prefixkeys(sumdicts(*dicts), prefix + '_') |
|
104 ui.log(prefix + "_packsizes", "", **dict) |
|
105 |
|
106 def _parsepackmeta(metabuf): |
|
107 """parse datapack meta, bytes (<metadata-list>) -> dict |
|
108 |
|
109 The dict contains raw content - both keys and values are strings. |
|
110 Upper-level business may want to convert some of them to other types like |
|
111 integers, on their own. |
|
112 |
|
113 raise ValueError if the data is corrupted |
|
114 """ |
|
115 metadict = {} |
|
116 offset = 0 |
|
117 buflen = len(metabuf) |
|
118 while buflen - offset >= 3: |
|
119 key = metabuf[offset] |
|
120 offset += 1 |
|
121 metalen = struct.unpack_from('!H', metabuf, offset)[0] |
|
122 offset += 2 |
|
123 if offset + metalen > buflen: |
|
124 raise ValueError('corrupted metadata: incomplete buffer') |
|
125 value = metabuf[offset:offset + metalen] |
|
126 metadict[key] = value |
|
127 offset += metalen |
|
128 if offset != buflen: |
|
129 raise ValueError('corrupted metadata: redundant data') |
|
130 return metadict |
|
131 |
|
132 def _buildpackmeta(metadict): |
|
133 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>) |
|
134 |
|
135 The dict contains raw content - both keys and values are strings. |
|
136 Upper-level business may want to serialize some of other types (like |
|
137 integers) to strings before calling this function. |
|
138 |
|
139 raise ProgrammingError when metadata key is illegal, or ValueError if |
|
140 length limit is exceeded |
|
141 """ |
|
142 metabuf = '' |
|
143 for k, v in sorted((metadict or {}).iteritems()): |
|
144 if len(k) != 1: |
|
145 raise error.ProgrammingError('packmeta: illegal key: %s' % k) |
|
146 if len(v) > 0xfffe: |
|
147 raise ValueError('metadata value is too long: 0x%x > 0xfffe' |
|
148 % len(v)) |
|
149 metabuf += k |
|
150 metabuf += struct.pack('!H', len(v)) |
|
151 metabuf += v |
|
152 # len(metabuf) is guaranteed representable in 4 bytes, because there are |
|
153 # only 256 keys, and for each value, len(value) <= 0xfffe. |
|
154 return metabuf |
|
155 |
|
156 _metaitemtypes = { |
|
157 constants.METAKEYFLAG: (int, long), |
|
158 constants.METAKEYSIZE: (int, long), |
|
159 } |
|
160 |
|
161 def buildpackmeta(metadict): |
|
162 """like _buildpackmeta, but typechecks metadict and normalize it. |
|
163 |
|
164 This means, METAKEYSIZE and METAKEYSIZE should have integers as values, |
|
165 and METAKEYFLAG will be dropped if its value is 0. |
|
166 """ |
|
167 newmeta = {} |
|
168 for k, v in (metadict or {}).iteritems(): |
|
169 expectedtype = _metaitemtypes.get(k, (bytes,)) |
|
170 if not isinstance(v, expectedtype): |
|
171 raise error.ProgrammingError('packmeta: wrong type of key %s' % k) |
|
172 # normalize int to binary buffer |
|
173 if int in expectedtype: |
|
174 # optimization: remove flag if it's 0 to save space |
|
175 if k == constants.METAKEYFLAG and v == 0: |
|
176 continue |
|
177 v = int2bin(v) |
|
178 newmeta[k] = v |
|
179 return _buildpackmeta(newmeta) |
|
180 |
|
181 def parsepackmeta(metabuf): |
|
182 """like _parsepackmeta, but convert fields to desired types automatically. |
|
183 |
|
184 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to |
|
185 integers. |
|
186 """ |
|
187 metadict = _parsepackmeta(metabuf) |
|
188 for k, v in metadict.iteritems(): |
|
189 if k in _metaitemtypes and int in _metaitemtypes[k]: |
|
190 metadict[k] = bin2int(v) |
|
191 return metadict |
|
192 |
|
193 def int2bin(n): |
|
194 """convert a non-negative integer to raw binary buffer""" |
|
195 buf = bytearray() |
|
196 while n > 0: |
|
197 buf.insert(0, n & 0xff) |
|
198 n >>= 8 |
|
199 return bytes(buf) |
|
200 |
|
201 def bin2int(buf): |
|
202 """the reverse of int2bin, convert a binary buffer to an integer""" |
|
203 x = 0 |
|
204 for b in bytearray(buf): |
|
205 x <<= 8 |
|
206 x |= b |
|
207 return x |
|
208 |
|
209 def parsesizeflags(raw): |
|
210 """given a remotefilelog blob, return (headersize, rawtextsize, flags) |
|
211 |
|
212 see remotefilelogserver.createfileblob for the format. |
|
213 raise RuntimeError if the content is illformed. |
|
214 """ |
|
215 flags = revlog.REVIDX_DEFAULT_FLAGS |
|
216 size = None |
|
217 try: |
|
218 index = raw.index('\0') |
|
219 header = raw[:index] |
|
220 if header.startswith('v'): |
|
221 # v1 and above, header starts with 'v' |
|
222 if header.startswith('v1\n'): |
|
223 for s in header.split('\n'): |
|
224 if s.startswith(constants.METAKEYSIZE): |
|
225 size = int(s[len(constants.METAKEYSIZE):]) |
|
226 elif s.startswith(constants.METAKEYFLAG): |
|
227 flags = int(s[len(constants.METAKEYFLAG):]) |
|
228 else: |
|
229 raise RuntimeError('unsupported remotefilelog header: %s' |
|
230 % header) |
|
231 else: |
|
232 # v0, str(int(size)) is the header |
|
233 size = int(header) |
|
234 except ValueError: |
|
235 raise RuntimeError("unexpected remotefilelog header: illegal format") |
|
236 if size is None: |
|
237 raise RuntimeError("unexpected remotefilelog header: no size found") |
|
238 return index + 1, size, flags |
|
239 |
|
240 def buildfileblobheader(size, flags, version=None): |
|
241 """return the header of a remotefilelog blob. |
|
242 |
|
243 see remotefilelogserver.createfileblob for the format. |
|
244 approximately the reverse of parsesizeflags. |
|
245 |
|
246 version could be 0 or 1, or None (auto decide). |
|
247 """ |
|
248 # choose v0 if flags is empty, otherwise v1 |
|
249 if version is None: |
|
250 version = int(bool(flags)) |
|
251 if version == 1: |
|
252 header = ('v1\n%s%d\n%s%d' |
|
253 % (constants.METAKEYSIZE, size, |
|
254 constants.METAKEYFLAG, flags)) |
|
255 elif version == 0: |
|
256 if flags: |
|
257 raise error.ProgrammingError('fileblob v0 does not support flag') |
|
258 header = '%d' % size |
|
259 else: |
|
260 raise error.ProgrammingError('unknown fileblob version %d' % version) |
|
261 return header |
|
262 |
|
263 def ancestormap(raw): |
|
264 offset, size, flags = parsesizeflags(raw) |
|
265 start = offset + size |
|
266 |
|
267 mapping = {} |
|
268 while start < len(raw): |
|
269 divider = raw.index('\0', start + 80) |
|
270 |
|
271 currentnode = raw[start:(start + 20)] |
|
272 p1 = raw[(start + 20):(start + 40)] |
|
273 p2 = raw[(start + 40):(start + 60)] |
|
274 linknode = raw[(start + 60):(start + 80)] |
|
275 copyfrom = raw[(start + 80):divider] |
|
276 |
|
277 mapping[currentnode] = (p1, p2, linknode, copyfrom) |
|
278 start = divider + 1 |
|
279 |
|
280 return mapping |
|
281 |
|
282 def readfile(path): |
|
283 f = open(path, 'rb') |
|
284 try: |
|
285 result = f.read() |
|
286 |
|
287 # we should never have empty files |
|
288 if not result: |
|
289 os.remove(path) |
|
290 raise IOError("empty file: %s" % path) |
|
291 |
|
292 return result |
|
293 finally: |
|
294 f.close() |
|
295 |
|
296 def unlinkfile(filepath): |
|
297 if pycompat.iswindows: |
|
298 # On Windows, os.unlink cannnot delete readonly files |
|
299 os.chmod(filepath, stat.S_IWUSR) |
|
300 os.unlink(filepath) |
|
301 |
|
302 def renamefile(source, destination): |
|
303 if pycompat.iswindows: |
|
304 # On Windows, os.rename cannot rename readonly files |
|
305 # and cannot overwrite destination if it exists |
|
306 os.chmod(source, stat.S_IWUSR) |
|
307 if os.path.isfile(destination): |
|
308 os.chmod(destination, stat.S_IWUSR) |
|
309 os.unlink(destination) |
|
310 |
|
311 os.rename(source, destination) |
|
312 |
|
313 def writefile(path, content, readonly=False): |
|
314 dirname, filename = os.path.split(path) |
|
315 if not os.path.exists(dirname): |
|
316 try: |
|
317 os.makedirs(dirname) |
|
318 except OSError as ex: |
|
319 if ex.errno != errno.EEXIST: |
|
320 raise |
|
321 |
|
322 fd, temp = tempfile.mkstemp(prefix='.%s-' % filename, dir=dirname) |
|
323 os.close(fd) |
|
324 |
|
325 try: |
|
326 f = util.posixfile(temp, 'wb') |
|
327 f.write(content) |
|
328 f.close() |
|
329 |
|
330 if readonly: |
|
331 mode = 0o444 |
|
332 else: |
|
333 # tempfiles are created with 0o600, so we need to manually set the |
|
334 # mode. |
|
335 oldumask = os.umask(0) |
|
336 # there's no way to get the umask without modifying it, so set it |
|
337 # back |
|
338 os.umask(oldumask) |
|
339 mode = ~oldumask |
|
340 |
|
341 renamefile(temp, path) |
|
342 os.chmod(path, mode) |
|
343 except Exception: |
|
344 try: |
|
345 unlinkfile(temp) |
|
346 except OSError: |
|
347 pass |
|
348 raise |
|
349 |
|
350 def sortnodes(nodes, parentfunc): |
|
351 """Topologically sorts the nodes, using the parentfunc to find |
|
352 the parents of nodes.""" |
|
353 nodes = set(nodes) |
|
354 childmap = {} |
|
355 parentmap = {} |
|
356 roots = [] |
|
357 |
|
358 # Build a child and parent map |
|
359 for n in nodes: |
|
360 parents = [p for p in parentfunc(n) if p in nodes] |
|
361 parentmap[n] = set(parents) |
|
362 for p in parents: |
|
363 childmap.setdefault(p, set()).add(n) |
|
364 if not parents: |
|
365 roots.append(n) |
|
366 |
|
367 roots.sort() |
|
368 # Process roots, adding children to the queue as they become roots |
|
369 results = [] |
|
370 while roots: |
|
371 n = roots.pop(0) |
|
372 results.append(n) |
|
373 if n in childmap: |
|
374 children = childmap[n] |
|
375 for c in children: |
|
376 childparents = parentmap[c] |
|
377 childparents.remove(n) |
|
378 if len(childparents) == 0: |
|
379 # insert at the beginning, that way child nodes |
|
380 # are likely to be output immediately after their |
|
381 # parents. This gives better compression results. |
|
382 roots.insert(0, c) |
|
383 |
|
384 return results |
|
385 |
|
386 def readexactly(stream, n): |
|
387 '''read n bytes from stream.read and abort if less was available''' |
|
388 s = stream.read(n) |
|
389 if len(s) < n: |
|
390 raise error.Abort(_("stream ended unexpectedly" |
|
391 " (got %d bytes, expected %d)") |
|
392 % (len(s), n)) |
|
393 return s |
|
394 |
|
395 def readunpack(stream, fmt): |
|
396 data = readexactly(stream, struct.calcsize(fmt)) |
|
397 return struct.unpack(fmt, data) |
|
398 |
|
399 def readpath(stream): |
|
400 rawlen = readexactly(stream, constants.FILENAMESIZE) |
|
401 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0] |
|
402 return readexactly(stream, pathlen) |
|
403 |
|
404 def readnodelist(stream): |
|
405 rawlen = readexactly(stream, constants.NODECOUNTSIZE) |
|
406 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0] |
|
407 for i in pycompat.xrange(nodecount): |
|
408 yield readexactly(stream, constants.NODESIZE) |
|
409 |
|
410 def readpathlist(stream): |
|
411 rawlen = readexactly(stream, constants.PATHCOUNTSIZE) |
|
412 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0] |
|
413 for i in pycompat.xrange(pathcount): |
|
414 yield readpath(stream) |
|
415 |
|
416 def getgid(groupname): |
|
417 try: |
|
418 gid = grp.getgrnam(groupname).gr_gid |
|
419 return gid |
|
420 except KeyError: |
|
421 return None |
|
422 |
|
423 def setstickygroupdir(path, gid, warn=None): |
|
424 if gid is None: |
|
425 return |
|
426 try: |
|
427 os.chown(path, -1, gid) |
|
428 os.chmod(path, 0o2775) |
|
429 except (IOError, OSError) as ex: |
|
430 if warn: |
|
431 warn(_('unable to chown/chmod on %s: %s\n') % (path, ex)) |
|
432 |
|
433 def mkstickygroupdir(ui, path): |
|
434 """Creates the given directory (if it doesn't exist) and give it a |
|
435 particular group with setgid enabled.""" |
|
436 gid = None |
|
437 groupname = ui.config("remotefilelog", "cachegroup") |
|
438 if groupname: |
|
439 gid = getgid(groupname) |
|
440 if gid is None: |
|
441 ui.warn(_('unable to resolve group name: %s\n') % groupname) |
|
442 |
|
443 # we use a single stat syscall to test the existence and mode / group bit |
|
444 st = None |
|
445 try: |
|
446 st = os.stat(path) |
|
447 except OSError: |
|
448 pass |
|
449 |
|
450 if st: |
|
451 # exists |
|
452 if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid: |
|
453 # permission needs to be fixed |
|
454 setstickygroupdir(path, gid, ui.warn) |
|
455 return |
|
456 |
|
457 oldumask = os.umask(0o002) |
|
458 try: |
|
459 missingdirs = [path] |
|
460 path = os.path.dirname(path) |
|
461 while path and not os.path.exists(path): |
|
462 missingdirs.append(path) |
|
463 path = os.path.dirname(path) |
|
464 |
|
465 for path in reversed(missingdirs): |
|
466 try: |
|
467 os.mkdir(path) |
|
468 except OSError as ex: |
|
469 if ex.errno != errno.EEXIST: |
|
470 raise |
|
471 |
|
472 for path in missingdirs: |
|
473 setstickygroupdir(path, gid, ui.warn) |
|
474 finally: |
|
475 os.umask(oldumask) |
|
476 |
|
477 def getusername(ui): |
|
478 try: |
|
479 return stringutil.shortuser(ui.username()) |
|
480 except Exception: |
|
481 return 'unknown' |
|
482 |
|
483 def getreponame(ui): |
|
484 reponame = ui.config('paths', 'default') |
|
485 if reponame: |
|
486 return os.path.basename(reponame) |
|
487 return "unknown" |