Mercurial > hg
comparison mercurial/dirstatemap.py @ 47501:8b7e47802deb
dirstate: split dirstatemap in its own file
The dirstate file is large enough and the dirstatemap is quite insulated logic
already.
Differential Revision: https://phab.mercurial-scm.org/D10934
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 02 Jul 2021 02:27:48 +0200 |
parents | mercurial/dirstate.py@4a7df782f416 |
children | eaae39894312 |
comparison
equal
deleted
inserted
replaced
47500:23f5ed6dbcb1 | 47501:8b7e47802deb |
---|---|
1 # dirstatemap.py | |
2 # | |
3 # This software may be used and distributed according to the terms of the | |
4 # GNU General Public License version 2 or any later version. | |
5 | |
6 from __future__ import absolute_import | |
7 | |
8 import errno | |
9 | |
10 from .i18n import _ | |
11 | |
12 from . import ( | |
13 error, | |
14 pathutil, | |
15 policy, | |
16 pycompat, | |
17 txnutil, | |
18 util, | |
19 ) | |
20 | |
21 parsers = policy.importmod('parsers') | |
22 rustmod = policy.importrust('dirstate') | |
23 | |
24 propertycache = util.propertycache | |
25 | |
26 dirstatetuple = parsers.dirstatetuple | |
27 | |
28 | |
29 # a special value used internally for `size` if the file come from the other parent | |
30 FROM_P2 = -2 | |
31 | |
32 # a special value used internally for `size` if the file is modified/merged/added | |
33 NONNORMAL = -1 | |
34 | |
35 # a special value used internally for `time` if the time is ambigeous | |
36 AMBIGUOUS_TIME = -1 | |
37 | |
38 | |
39 class dirstatemap(object): | |
40 """Map encapsulating the dirstate's contents. | |
41 | |
42 The dirstate contains the following state: | |
43 | |
44 - `identity` is the identity of the dirstate file, which can be used to | |
45 detect when changes have occurred to the dirstate file. | |
46 | |
47 - `parents` is a pair containing the parents of the working copy. The | |
48 parents are updated by calling `setparents`. | |
49 | |
50 - the state map maps filenames to tuples of (state, mode, size, mtime), | |
51 where state is a single character representing 'normal', 'added', | |
52 'removed', or 'merged'. It is read by treating the dirstate as a | |
53 dict. File state is updated by calling the `addfile`, `removefile` and | |
54 `dropfile` methods. | |
55 | |
56 - `copymap` maps destination filenames to their source filename. | |
57 | |
58 The dirstate also provides the following views onto the state: | |
59 | |
60 - `nonnormalset` is a set of the filenames that have state other | |
61 than 'normal', or are normal but have an mtime of -1 ('normallookup'). | |
62 | |
63 - `otherparentset` is a set of the filenames that are marked as coming | |
64 from the second parent when the dirstate is currently being merged. | |
65 | |
66 - `filefoldmap` is a dict mapping normalized filenames to the denormalized | |
67 form that they appear as in the dirstate. | |
68 | |
69 - `dirfoldmap` is a dict mapping normalized directory names to the | |
70 denormalized form that they appear as in the dirstate. | |
71 """ | |
72 | |
73 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2): | |
74 self._ui = ui | |
75 self._opener = opener | |
76 self._root = root | |
77 self._filename = b'dirstate' | |
78 self._nodelen = 20 | |
79 self._nodeconstants = nodeconstants | |
80 assert ( | |
81 not use_dirstate_v2 | |
82 ), "should have detected unsupported requirement" | |
83 | |
84 self._parents = None | |
85 self._dirtyparents = False | |
86 | |
87 # for consistent view between _pl() and _read() invocations | |
88 self._pendingmode = None | |
89 | |
90 @propertycache | |
91 def _map(self): | |
92 self._map = {} | |
93 self.read() | |
94 return self._map | |
95 | |
96 @propertycache | |
97 def copymap(self): | |
98 self.copymap = {} | |
99 self._map | |
100 return self.copymap | |
101 | |
102 def directories(self): | |
103 # Rust / dirstate-v2 only | |
104 return [] | |
105 | |
106 def clear(self): | |
107 self._map.clear() | |
108 self.copymap.clear() | |
109 self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid) | |
110 util.clearcachedproperty(self, b"_dirs") | |
111 util.clearcachedproperty(self, b"_alldirs") | |
112 util.clearcachedproperty(self, b"filefoldmap") | |
113 util.clearcachedproperty(self, b"dirfoldmap") | |
114 util.clearcachedproperty(self, b"nonnormalset") | |
115 util.clearcachedproperty(self, b"otherparentset") | |
116 | |
117 def items(self): | |
118 return pycompat.iteritems(self._map) | |
119 | |
120 # forward for python2,3 compat | |
121 iteritems = items | |
122 | |
123 def __len__(self): | |
124 return len(self._map) | |
125 | |
126 def __iter__(self): | |
127 return iter(self._map) | |
128 | |
129 def get(self, key, default=None): | |
130 return self._map.get(key, default) | |
131 | |
132 def __contains__(self, key): | |
133 return key in self._map | |
134 | |
135 def __getitem__(self, key): | |
136 return self._map[key] | |
137 | |
138 def keys(self): | |
139 return self._map.keys() | |
140 | |
141 def preload(self): | |
142 """Loads the underlying data, if it's not already loaded""" | |
143 self._map | |
144 | |
145 def addfile(self, f, oldstate, state, mode, size, mtime): | |
146 """Add a tracked file to the dirstate.""" | |
147 if oldstate in b"?r" and "_dirs" in self.__dict__: | |
148 self._dirs.addpath(f) | |
149 if oldstate == b"?" and "_alldirs" in self.__dict__: | |
150 self._alldirs.addpath(f) | |
151 self._map[f] = dirstatetuple(state, mode, size, mtime) | |
152 if state != b'n' or mtime == AMBIGUOUS_TIME: | |
153 self.nonnormalset.add(f) | |
154 if size == FROM_P2: | |
155 self.otherparentset.add(f) | |
156 | |
157 def removefile(self, f, oldstate, size): | |
158 """ | |
159 Mark a file as removed in the dirstate. | |
160 | |
161 The `size` parameter is used to store sentinel values that indicate | |
162 the file's previous state. In the future, we should refactor this | |
163 to be more explicit about what that state is. | |
164 """ | |
165 if oldstate not in b"?r" and "_dirs" in self.__dict__: | |
166 self._dirs.delpath(f) | |
167 if oldstate == b"?" and "_alldirs" in self.__dict__: | |
168 self._alldirs.addpath(f) | |
169 if "filefoldmap" in self.__dict__: | |
170 normed = util.normcase(f) | |
171 self.filefoldmap.pop(normed, None) | |
172 self._map[f] = dirstatetuple(b'r', 0, size, 0) | |
173 self.nonnormalset.add(f) | |
174 | |
175 def dropfile(self, f, oldstate): | |
176 """ | |
177 Remove a file from the dirstate. Returns True if the file was | |
178 previously recorded. | |
179 """ | |
180 exists = self._map.pop(f, None) is not None | |
181 if exists: | |
182 if oldstate != b"r" and "_dirs" in self.__dict__: | |
183 self._dirs.delpath(f) | |
184 if "_alldirs" in self.__dict__: | |
185 self._alldirs.delpath(f) | |
186 if "filefoldmap" in self.__dict__: | |
187 normed = util.normcase(f) | |
188 self.filefoldmap.pop(normed, None) | |
189 self.nonnormalset.discard(f) | |
190 return exists | |
191 | |
192 def clearambiguoustimes(self, files, now): | |
193 for f in files: | |
194 e = self.get(f) | |
195 if e is not None and e[0] == b'n' and e[3] == now: | |
196 self._map[f] = dirstatetuple(e[0], e[1], e[2], AMBIGUOUS_TIME) | |
197 self.nonnormalset.add(f) | |
198 | |
199 def nonnormalentries(self): | |
200 '''Compute the nonnormal dirstate entries from the dmap''' | |
201 try: | |
202 return parsers.nonnormalotherparententries(self._map) | |
203 except AttributeError: | |
204 nonnorm = set() | |
205 otherparent = set() | |
206 for fname, e in pycompat.iteritems(self._map): | |
207 if e[0] != b'n' or e[3] == AMBIGUOUS_TIME: | |
208 nonnorm.add(fname) | |
209 if e[0] == b'n' and e[2] == FROM_P2: | |
210 otherparent.add(fname) | |
211 return nonnorm, otherparent | |
212 | |
213 @propertycache | |
214 def filefoldmap(self): | |
215 """Returns a dictionary mapping normalized case paths to their | |
216 non-normalized versions. | |
217 """ | |
218 try: | |
219 makefilefoldmap = parsers.make_file_foldmap | |
220 except AttributeError: | |
221 pass | |
222 else: | |
223 return makefilefoldmap( | |
224 self._map, util.normcasespec, util.normcasefallback | |
225 ) | |
226 | |
227 f = {} | |
228 normcase = util.normcase | |
229 for name, s in pycompat.iteritems(self._map): | |
230 if s[0] != b'r': | |
231 f[normcase(name)] = name | |
232 f[b'.'] = b'.' # prevents useless util.fspath() invocation | |
233 return f | |
234 | |
235 def hastrackeddir(self, d): | |
236 """ | |
237 Returns True if the dirstate contains a tracked (not removed) file | |
238 in this directory. | |
239 """ | |
240 return d in self._dirs | |
241 | |
242 def hasdir(self, d): | |
243 """ | |
244 Returns True if the dirstate contains a file (tracked or removed) | |
245 in this directory. | |
246 """ | |
247 return d in self._alldirs | |
248 | |
249 @propertycache | |
250 def _dirs(self): | |
251 return pathutil.dirs(self._map, b'r') | |
252 | |
253 @propertycache | |
254 def _alldirs(self): | |
255 return pathutil.dirs(self._map) | |
256 | |
257 def _opendirstatefile(self): | |
258 fp, mode = txnutil.trypending(self._root, self._opener, self._filename) | |
259 if self._pendingmode is not None and self._pendingmode != mode: | |
260 fp.close() | |
261 raise error.Abort( | |
262 _(b'working directory state may be changed parallelly') | |
263 ) | |
264 self._pendingmode = mode | |
265 return fp | |
266 | |
267 def parents(self): | |
268 if not self._parents: | |
269 try: | |
270 fp = self._opendirstatefile() | |
271 st = fp.read(2 * self._nodelen) | |
272 fp.close() | |
273 except IOError as err: | |
274 if err.errno != errno.ENOENT: | |
275 raise | |
276 # File doesn't exist, so the current state is empty | |
277 st = b'' | |
278 | |
279 l = len(st) | |
280 if l == self._nodelen * 2: | |
281 self._parents = ( | |
282 st[: self._nodelen], | |
283 st[self._nodelen : 2 * self._nodelen], | |
284 ) | |
285 elif l == 0: | |
286 self._parents = ( | |
287 self._nodeconstants.nullid, | |
288 self._nodeconstants.nullid, | |
289 ) | |
290 else: | |
291 raise error.Abort( | |
292 _(b'working directory state appears damaged!') | |
293 ) | |
294 | |
295 return self._parents | |
296 | |
297 def setparents(self, p1, p2): | |
298 self._parents = (p1, p2) | |
299 self._dirtyparents = True | |
300 | |
301 def read(self): | |
302 # ignore HG_PENDING because identity is used only for writing | |
303 self.identity = util.filestat.frompath( | |
304 self._opener.join(self._filename) | |
305 ) | |
306 | |
307 try: | |
308 fp = self._opendirstatefile() | |
309 try: | |
310 st = fp.read() | |
311 finally: | |
312 fp.close() | |
313 except IOError as err: | |
314 if err.errno != errno.ENOENT: | |
315 raise | |
316 return | |
317 if not st: | |
318 return | |
319 | |
320 if util.safehasattr(parsers, b'dict_new_presized'): | |
321 # Make an estimate of the number of files in the dirstate based on | |
322 # its size. This trades wasting some memory for avoiding costly | |
323 # resizes. Each entry have a prefix of 17 bytes followed by one or | |
324 # two path names. Studies on various large-scale real-world repositories | |
325 # found 54 bytes a reasonable upper limit for the average path names. | |
326 # Copy entries are ignored for the sake of this estimate. | |
327 self._map = parsers.dict_new_presized(len(st) // 71) | |
328 | |
329 # Python's garbage collector triggers a GC each time a certain number | |
330 # of container objects (the number being defined by | |
331 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple | |
332 # for each file in the dirstate. The C version then immediately marks | |
333 # them as not to be tracked by the collector. However, this has no | |
334 # effect on when GCs are triggered, only on what objects the GC looks | |
335 # into. This means that O(number of files) GCs are unavoidable. | |
336 # Depending on when in the process's lifetime the dirstate is parsed, | |
337 # this can get very expensive. As a workaround, disable GC while | |
338 # parsing the dirstate. | |
339 # | |
340 # (we cannot decorate the function directly since it is in a C module) | |
341 parse_dirstate = util.nogc(parsers.parse_dirstate) | |
342 p = parse_dirstate(self._map, self.copymap, st) | |
343 if not self._dirtyparents: | |
344 self.setparents(*p) | |
345 | |
346 # Avoid excess attribute lookups by fast pathing certain checks | |
347 self.__contains__ = self._map.__contains__ | |
348 self.__getitem__ = self._map.__getitem__ | |
349 self.get = self._map.get | |
350 | |
351 def write(self, st, now): | |
352 st.write( | |
353 parsers.pack_dirstate(self._map, self.copymap, self.parents(), now) | |
354 ) | |
355 st.close() | |
356 self._dirtyparents = False | |
357 self.nonnormalset, self.otherparentset = self.nonnormalentries() | |
358 | |
359 @propertycache | |
360 def nonnormalset(self): | |
361 nonnorm, otherparents = self.nonnormalentries() | |
362 self.otherparentset = otherparents | |
363 return nonnorm | |
364 | |
365 @propertycache | |
366 def otherparentset(self): | |
367 nonnorm, otherparents = self.nonnormalentries() | |
368 self.nonnormalset = nonnorm | |
369 return otherparents | |
370 | |
371 def non_normal_or_other_parent_paths(self): | |
372 return self.nonnormalset.union(self.otherparentset) | |
373 | |
374 @propertycache | |
375 def identity(self): | |
376 self._map | |
377 return self.identity | |
378 | |
379 @propertycache | |
380 def dirfoldmap(self): | |
381 f = {} | |
382 normcase = util.normcase | |
383 for name in self._dirs: | |
384 f[normcase(name)] = name | |
385 return f | |
386 | |
387 | |
388 if rustmod is not None: | |
389 | |
390 class dirstatemap(object): | |
391 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2): | |
392 self._use_dirstate_v2 = use_dirstate_v2 | |
393 self._nodeconstants = nodeconstants | |
394 self._ui = ui | |
395 self._opener = opener | |
396 self._root = root | |
397 self._filename = b'dirstate' | |
398 self._nodelen = 20 # Also update Rust code when changing this! | |
399 self._parents = None | |
400 self._dirtyparents = False | |
401 | |
402 # for consistent view between _pl() and _read() invocations | |
403 self._pendingmode = None | |
404 | |
405 self._use_dirstate_tree = self._ui.configbool( | |
406 b"experimental", | |
407 b"dirstate-tree.in-memory", | |
408 False, | |
409 ) | |
410 | |
411 def addfile(self, *args, **kwargs): | |
412 return self._rustmap.addfile(*args, **kwargs) | |
413 | |
414 def removefile(self, *args, **kwargs): | |
415 return self._rustmap.removefile(*args, **kwargs) | |
416 | |
417 def dropfile(self, *args, **kwargs): | |
418 return self._rustmap.dropfile(*args, **kwargs) | |
419 | |
420 def clearambiguoustimes(self, *args, **kwargs): | |
421 return self._rustmap.clearambiguoustimes(*args, **kwargs) | |
422 | |
423 def nonnormalentries(self): | |
424 return self._rustmap.nonnormalentries() | |
425 | |
426 def get(self, *args, **kwargs): | |
427 return self._rustmap.get(*args, **kwargs) | |
428 | |
429 @property | |
430 def copymap(self): | |
431 return self._rustmap.copymap() | |
432 | |
433 def directories(self): | |
434 return self._rustmap.directories() | |
435 | |
436 def preload(self): | |
437 self._rustmap | |
438 | |
439 def clear(self): | |
440 self._rustmap.clear() | |
441 self.setparents( | |
442 self._nodeconstants.nullid, self._nodeconstants.nullid | |
443 ) | |
444 util.clearcachedproperty(self, b"_dirs") | |
445 util.clearcachedproperty(self, b"_alldirs") | |
446 util.clearcachedproperty(self, b"dirfoldmap") | |
447 | |
448 def items(self): | |
449 return self._rustmap.items() | |
450 | |
451 def keys(self): | |
452 return iter(self._rustmap) | |
453 | |
454 def __contains__(self, key): | |
455 return key in self._rustmap | |
456 | |
457 def __getitem__(self, item): | |
458 return self._rustmap[item] | |
459 | |
460 def __len__(self): | |
461 return len(self._rustmap) | |
462 | |
463 def __iter__(self): | |
464 return iter(self._rustmap) | |
465 | |
466 # forward for python2,3 compat | |
467 iteritems = items | |
468 | |
469 def _opendirstatefile(self): | |
470 fp, mode = txnutil.trypending( | |
471 self._root, self._opener, self._filename | |
472 ) | |
473 if self._pendingmode is not None and self._pendingmode != mode: | |
474 fp.close() | |
475 raise error.Abort( | |
476 _(b'working directory state may be changed parallelly') | |
477 ) | |
478 self._pendingmode = mode | |
479 return fp | |
480 | |
481 def setparents(self, p1, p2): | |
482 self._parents = (p1, p2) | |
483 self._dirtyparents = True | |
484 | |
485 def parents(self): | |
486 if not self._parents: | |
487 if self._use_dirstate_v2: | |
488 offset = len(rustmod.V2_FORMAT_MARKER) | |
489 else: | |
490 offset = 0 | |
491 read_len = offset + self._nodelen * 2 | |
492 try: | |
493 fp = self._opendirstatefile() | |
494 st = fp.read(read_len) | |
495 fp.close() | |
496 except IOError as err: | |
497 if err.errno != errno.ENOENT: | |
498 raise | |
499 # File doesn't exist, so the current state is empty | |
500 st = b'' | |
501 | |
502 l = len(st) | |
503 if l == read_len: | |
504 st = st[offset:] | |
505 self._parents = ( | |
506 st[: self._nodelen], | |
507 st[self._nodelen : 2 * self._nodelen], | |
508 ) | |
509 elif l == 0: | |
510 self._parents = ( | |
511 self._nodeconstants.nullid, | |
512 self._nodeconstants.nullid, | |
513 ) | |
514 else: | |
515 raise error.Abort( | |
516 _(b'working directory state appears damaged!') | |
517 ) | |
518 | |
519 return self._parents | |
520 | |
521 @propertycache | |
522 def _rustmap(self): | |
523 """ | |
524 Fills the Dirstatemap when called. | |
525 """ | |
526 # ignore HG_PENDING because identity is used only for writing | |
527 self.identity = util.filestat.frompath( | |
528 self._opener.join(self._filename) | |
529 ) | |
530 | |
531 try: | |
532 fp = self._opendirstatefile() | |
533 try: | |
534 st = fp.read() | |
535 finally: | |
536 fp.close() | |
537 except IOError as err: | |
538 if err.errno != errno.ENOENT: | |
539 raise | |
540 st = b'' | |
541 | |
542 self._rustmap, parents = rustmod.DirstateMap.new( | |
543 self._use_dirstate_tree, self._use_dirstate_v2, st | |
544 ) | |
545 | |
546 if parents and not self._dirtyparents: | |
547 self.setparents(*parents) | |
548 | |
549 self.__contains__ = self._rustmap.__contains__ | |
550 self.__getitem__ = self._rustmap.__getitem__ | |
551 self.get = self._rustmap.get | |
552 return self._rustmap | |
553 | |
554 def write(self, st, now): | |
555 parents = self.parents() | |
556 packed = self._rustmap.write( | |
557 self._use_dirstate_v2, parents[0], parents[1], now | |
558 ) | |
559 st.write(packed) | |
560 st.close() | |
561 self._dirtyparents = False | |
562 | |
563 @propertycache | |
564 def filefoldmap(self): | |
565 """Returns a dictionary mapping normalized case paths to their | |
566 non-normalized versions. | |
567 """ | |
568 return self._rustmap.filefoldmapasdict() | |
569 | |
570 def hastrackeddir(self, d): | |
571 return self._rustmap.hastrackeddir(d) | |
572 | |
573 def hasdir(self, d): | |
574 return self._rustmap.hasdir(d) | |
575 | |
576 @propertycache | |
577 def identity(self): | |
578 self._rustmap | |
579 return self.identity | |
580 | |
581 @property | |
582 def nonnormalset(self): | |
583 nonnorm = self._rustmap.non_normal_entries() | |
584 return nonnorm | |
585 | |
586 @propertycache | |
587 def otherparentset(self): | |
588 otherparents = self._rustmap.other_parent_entries() | |
589 return otherparents | |
590 | |
591 def non_normal_or_other_parent_paths(self): | |
592 return self._rustmap.non_normal_or_other_parent_paths() | |
593 | |
594 @propertycache | |
595 def dirfoldmap(self): | |
596 f = {} | |
597 normcase = util.normcase | |
598 for name, _pseudo_entry in self.directories(): | |
599 f[normcase(name)] = name | |
600 return f |