comparison mercurial/dirstatemap.py @ 47501:8b7e47802deb

dirstate: split dirstatemap in its own file The dirstate file is large enough and the dirstatemap is quite insulated logic already. Differential Revision: https://phab.mercurial-scm.org/D10934
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Fri, 02 Jul 2021 02:27:48 +0200
parents mercurial/dirstate.py@4a7df782f416
children eaae39894312
comparison
equal deleted inserted replaced
47500:23f5ed6dbcb1 47501:8b7e47802deb
1 # dirstatemap.py
2 #
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
5
6 from __future__ import absolute_import
7
8 import errno
9
10 from .i18n import _
11
12 from . import (
13 error,
14 pathutil,
15 policy,
16 pycompat,
17 txnutil,
18 util,
19 )
20
21 parsers = policy.importmod('parsers')
22 rustmod = policy.importrust('dirstate')
23
24 propertycache = util.propertycache
25
26 dirstatetuple = parsers.dirstatetuple
27
28
29 # a special value used internally for `size` if the file come from the other parent
30 FROM_P2 = -2
31
32 # a special value used internally for `size` if the file is modified/merged/added
33 NONNORMAL = -1
34
35 # a special value used internally for `time` if the time is ambigeous
36 AMBIGUOUS_TIME = -1
37
38
39 class dirstatemap(object):
40 """Map encapsulating the dirstate's contents.
41
42 The dirstate contains the following state:
43
44 - `identity` is the identity of the dirstate file, which can be used to
45 detect when changes have occurred to the dirstate file.
46
47 - `parents` is a pair containing the parents of the working copy. The
48 parents are updated by calling `setparents`.
49
50 - the state map maps filenames to tuples of (state, mode, size, mtime),
51 where state is a single character representing 'normal', 'added',
52 'removed', or 'merged'. It is read by treating the dirstate as a
53 dict. File state is updated by calling the `addfile`, `removefile` and
54 `dropfile` methods.
55
56 - `copymap` maps destination filenames to their source filename.
57
58 The dirstate also provides the following views onto the state:
59
60 - `nonnormalset` is a set of the filenames that have state other
61 than 'normal', or are normal but have an mtime of -1 ('normallookup').
62
63 - `otherparentset` is a set of the filenames that are marked as coming
64 from the second parent when the dirstate is currently being merged.
65
66 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
67 form that they appear as in the dirstate.
68
69 - `dirfoldmap` is a dict mapping normalized directory names to the
70 denormalized form that they appear as in the dirstate.
71 """
72
73 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
74 self._ui = ui
75 self._opener = opener
76 self._root = root
77 self._filename = b'dirstate'
78 self._nodelen = 20
79 self._nodeconstants = nodeconstants
80 assert (
81 not use_dirstate_v2
82 ), "should have detected unsupported requirement"
83
84 self._parents = None
85 self._dirtyparents = False
86
87 # for consistent view between _pl() and _read() invocations
88 self._pendingmode = None
89
90 @propertycache
91 def _map(self):
92 self._map = {}
93 self.read()
94 return self._map
95
96 @propertycache
97 def copymap(self):
98 self.copymap = {}
99 self._map
100 return self.copymap
101
102 def directories(self):
103 # Rust / dirstate-v2 only
104 return []
105
106 def clear(self):
107 self._map.clear()
108 self.copymap.clear()
109 self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid)
110 util.clearcachedproperty(self, b"_dirs")
111 util.clearcachedproperty(self, b"_alldirs")
112 util.clearcachedproperty(self, b"filefoldmap")
113 util.clearcachedproperty(self, b"dirfoldmap")
114 util.clearcachedproperty(self, b"nonnormalset")
115 util.clearcachedproperty(self, b"otherparentset")
116
117 def items(self):
118 return pycompat.iteritems(self._map)
119
120 # forward for python2,3 compat
121 iteritems = items
122
123 def __len__(self):
124 return len(self._map)
125
126 def __iter__(self):
127 return iter(self._map)
128
129 def get(self, key, default=None):
130 return self._map.get(key, default)
131
132 def __contains__(self, key):
133 return key in self._map
134
135 def __getitem__(self, key):
136 return self._map[key]
137
138 def keys(self):
139 return self._map.keys()
140
141 def preload(self):
142 """Loads the underlying data, if it's not already loaded"""
143 self._map
144
145 def addfile(self, f, oldstate, state, mode, size, mtime):
146 """Add a tracked file to the dirstate."""
147 if oldstate in b"?r" and "_dirs" in self.__dict__:
148 self._dirs.addpath(f)
149 if oldstate == b"?" and "_alldirs" in self.__dict__:
150 self._alldirs.addpath(f)
151 self._map[f] = dirstatetuple(state, mode, size, mtime)
152 if state != b'n' or mtime == AMBIGUOUS_TIME:
153 self.nonnormalset.add(f)
154 if size == FROM_P2:
155 self.otherparentset.add(f)
156
157 def removefile(self, f, oldstate, size):
158 """
159 Mark a file as removed in the dirstate.
160
161 The `size` parameter is used to store sentinel values that indicate
162 the file's previous state. In the future, we should refactor this
163 to be more explicit about what that state is.
164 """
165 if oldstate not in b"?r" and "_dirs" in self.__dict__:
166 self._dirs.delpath(f)
167 if oldstate == b"?" and "_alldirs" in self.__dict__:
168 self._alldirs.addpath(f)
169 if "filefoldmap" in self.__dict__:
170 normed = util.normcase(f)
171 self.filefoldmap.pop(normed, None)
172 self._map[f] = dirstatetuple(b'r', 0, size, 0)
173 self.nonnormalset.add(f)
174
175 def dropfile(self, f, oldstate):
176 """
177 Remove a file from the dirstate. Returns True if the file was
178 previously recorded.
179 """
180 exists = self._map.pop(f, None) is not None
181 if exists:
182 if oldstate != b"r" and "_dirs" in self.__dict__:
183 self._dirs.delpath(f)
184 if "_alldirs" in self.__dict__:
185 self._alldirs.delpath(f)
186 if "filefoldmap" in self.__dict__:
187 normed = util.normcase(f)
188 self.filefoldmap.pop(normed, None)
189 self.nonnormalset.discard(f)
190 return exists
191
192 def clearambiguoustimes(self, files, now):
193 for f in files:
194 e = self.get(f)
195 if e is not None and e[0] == b'n' and e[3] == now:
196 self._map[f] = dirstatetuple(e[0], e[1], e[2], AMBIGUOUS_TIME)
197 self.nonnormalset.add(f)
198
199 def nonnormalentries(self):
200 '''Compute the nonnormal dirstate entries from the dmap'''
201 try:
202 return parsers.nonnormalotherparententries(self._map)
203 except AttributeError:
204 nonnorm = set()
205 otherparent = set()
206 for fname, e in pycompat.iteritems(self._map):
207 if e[0] != b'n' or e[3] == AMBIGUOUS_TIME:
208 nonnorm.add(fname)
209 if e[0] == b'n' and e[2] == FROM_P2:
210 otherparent.add(fname)
211 return nonnorm, otherparent
212
213 @propertycache
214 def filefoldmap(self):
215 """Returns a dictionary mapping normalized case paths to their
216 non-normalized versions.
217 """
218 try:
219 makefilefoldmap = parsers.make_file_foldmap
220 except AttributeError:
221 pass
222 else:
223 return makefilefoldmap(
224 self._map, util.normcasespec, util.normcasefallback
225 )
226
227 f = {}
228 normcase = util.normcase
229 for name, s in pycompat.iteritems(self._map):
230 if s[0] != b'r':
231 f[normcase(name)] = name
232 f[b'.'] = b'.' # prevents useless util.fspath() invocation
233 return f
234
235 def hastrackeddir(self, d):
236 """
237 Returns True if the dirstate contains a tracked (not removed) file
238 in this directory.
239 """
240 return d in self._dirs
241
242 def hasdir(self, d):
243 """
244 Returns True if the dirstate contains a file (tracked or removed)
245 in this directory.
246 """
247 return d in self._alldirs
248
249 @propertycache
250 def _dirs(self):
251 return pathutil.dirs(self._map, b'r')
252
253 @propertycache
254 def _alldirs(self):
255 return pathutil.dirs(self._map)
256
257 def _opendirstatefile(self):
258 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
259 if self._pendingmode is not None and self._pendingmode != mode:
260 fp.close()
261 raise error.Abort(
262 _(b'working directory state may be changed parallelly')
263 )
264 self._pendingmode = mode
265 return fp
266
267 def parents(self):
268 if not self._parents:
269 try:
270 fp = self._opendirstatefile()
271 st = fp.read(2 * self._nodelen)
272 fp.close()
273 except IOError as err:
274 if err.errno != errno.ENOENT:
275 raise
276 # File doesn't exist, so the current state is empty
277 st = b''
278
279 l = len(st)
280 if l == self._nodelen * 2:
281 self._parents = (
282 st[: self._nodelen],
283 st[self._nodelen : 2 * self._nodelen],
284 )
285 elif l == 0:
286 self._parents = (
287 self._nodeconstants.nullid,
288 self._nodeconstants.nullid,
289 )
290 else:
291 raise error.Abort(
292 _(b'working directory state appears damaged!')
293 )
294
295 return self._parents
296
297 def setparents(self, p1, p2):
298 self._parents = (p1, p2)
299 self._dirtyparents = True
300
301 def read(self):
302 # ignore HG_PENDING because identity is used only for writing
303 self.identity = util.filestat.frompath(
304 self._opener.join(self._filename)
305 )
306
307 try:
308 fp = self._opendirstatefile()
309 try:
310 st = fp.read()
311 finally:
312 fp.close()
313 except IOError as err:
314 if err.errno != errno.ENOENT:
315 raise
316 return
317 if not st:
318 return
319
320 if util.safehasattr(parsers, b'dict_new_presized'):
321 # Make an estimate of the number of files in the dirstate based on
322 # its size. This trades wasting some memory for avoiding costly
323 # resizes. Each entry have a prefix of 17 bytes followed by one or
324 # two path names. Studies on various large-scale real-world repositories
325 # found 54 bytes a reasonable upper limit for the average path names.
326 # Copy entries are ignored for the sake of this estimate.
327 self._map = parsers.dict_new_presized(len(st) // 71)
328
329 # Python's garbage collector triggers a GC each time a certain number
330 # of container objects (the number being defined by
331 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
332 # for each file in the dirstate. The C version then immediately marks
333 # them as not to be tracked by the collector. However, this has no
334 # effect on when GCs are triggered, only on what objects the GC looks
335 # into. This means that O(number of files) GCs are unavoidable.
336 # Depending on when in the process's lifetime the dirstate is parsed,
337 # this can get very expensive. As a workaround, disable GC while
338 # parsing the dirstate.
339 #
340 # (we cannot decorate the function directly since it is in a C module)
341 parse_dirstate = util.nogc(parsers.parse_dirstate)
342 p = parse_dirstate(self._map, self.copymap, st)
343 if not self._dirtyparents:
344 self.setparents(*p)
345
346 # Avoid excess attribute lookups by fast pathing certain checks
347 self.__contains__ = self._map.__contains__
348 self.__getitem__ = self._map.__getitem__
349 self.get = self._map.get
350
351 def write(self, st, now):
352 st.write(
353 parsers.pack_dirstate(self._map, self.copymap, self.parents(), now)
354 )
355 st.close()
356 self._dirtyparents = False
357 self.nonnormalset, self.otherparentset = self.nonnormalentries()
358
359 @propertycache
360 def nonnormalset(self):
361 nonnorm, otherparents = self.nonnormalentries()
362 self.otherparentset = otherparents
363 return nonnorm
364
365 @propertycache
366 def otherparentset(self):
367 nonnorm, otherparents = self.nonnormalentries()
368 self.nonnormalset = nonnorm
369 return otherparents
370
371 def non_normal_or_other_parent_paths(self):
372 return self.nonnormalset.union(self.otherparentset)
373
374 @propertycache
375 def identity(self):
376 self._map
377 return self.identity
378
379 @propertycache
380 def dirfoldmap(self):
381 f = {}
382 normcase = util.normcase
383 for name in self._dirs:
384 f[normcase(name)] = name
385 return f
386
387
388 if rustmod is not None:
389
390 class dirstatemap(object):
391 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
392 self._use_dirstate_v2 = use_dirstate_v2
393 self._nodeconstants = nodeconstants
394 self._ui = ui
395 self._opener = opener
396 self._root = root
397 self._filename = b'dirstate'
398 self._nodelen = 20 # Also update Rust code when changing this!
399 self._parents = None
400 self._dirtyparents = False
401
402 # for consistent view between _pl() and _read() invocations
403 self._pendingmode = None
404
405 self._use_dirstate_tree = self._ui.configbool(
406 b"experimental",
407 b"dirstate-tree.in-memory",
408 False,
409 )
410
411 def addfile(self, *args, **kwargs):
412 return self._rustmap.addfile(*args, **kwargs)
413
414 def removefile(self, *args, **kwargs):
415 return self._rustmap.removefile(*args, **kwargs)
416
417 def dropfile(self, *args, **kwargs):
418 return self._rustmap.dropfile(*args, **kwargs)
419
420 def clearambiguoustimes(self, *args, **kwargs):
421 return self._rustmap.clearambiguoustimes(*args, **kwargs)
422
423 def nonnormalentries(self):
424 return self._rustmap.nonnormalentries()
425
426 def get(self, *args, **kwargs):
427 return self._rustmap.get(*args, **kwargs)
428
429 @property
430 def copymap(self):
431 return self._rustmap.copymap()
432
433 def directories(self):
434 return self._rustmap.directories()
435
436 def preload(self):
437 self._rustmap
438
439 def clear(self):
440 self._rustmap.clear()
441 self.setparents(
442 self._nodeconstants.nullid, self._nodeconstants.nullid
443 )
444 util.clearcachedproperty(self, b"_dirs")
445 util.clearcachedproperty(self, b"_alldirs")
446 util.clearcachedproperty(self, b"dirfoldmap")
447
448 def items(self):
449 return self._rustmap.items()
450
451 def keys(self):
452 return iter(self._rustmap)
453
454 def __contains__(self, key):
455 return key in self._rustmap
456
457 def __getitem__(self, item):
458 return self._rustmap[item]
459
460 def __len__(self):
461 return len(self._rustmap)
462
463 def __iter__(self):
464 return iter(self._rustmap)
465
466 # forward for python2,3 compat
467 iteritems = items
468
469 def _opendirstatefile(self):
470 fp, mode = txnutil.trypending(
471 self._root, self._opener, self._filename
472 )
473 if self._pendingmode is not None and self._pendingmode != mode:
474 fp.close()
475 raise error.Abort(
476 _(b'working directory state may be changed parallelly')
477 )
478 self._pendingmode = mode
479 return fp
480
481 def setparents(self, p1, p2):
482 self._parents = (p1, p2)
483 self._dirtyparents = True
484
485 def parents(self):
486 if not self._parents:
487 if self._use_dirstate_v2:
488 offset = len(rustmod.V2_FORMAT_MARKER)
489 else:
490 offset = 0
491 read_len = offset + self._nodelen * 2
492 try:
493 fp = self._opendirstatefile()
494 st = fp.read(read_len)
495 fp.close()
496 except IOError as err:
497 if err.errno != errno.ENOENT:
498 raise
499 # File doesn't exist, so the current state is empty
500 st = b''
501
502 l = len(st)
503 if l == read_len:
504 st = st[offset:]
505 self._parents = (
506 st[: self._nodelen],
507 st[self._nodelen : 2 * self._nodelen],
508 )
509 elif l == 0:
510 self._parents = (
511 self._nodeconstants.nullid,
512 self._nodeconstants.nullid,
513 )
514 else:
515 raise error.Abort(
516 _(b'working directory state appears damaged!')
517 )
518
519 return self._parents
520
521 @propertycache
522 def _rustmap(self):
523 """
524 Fills the Dirstatemap when called.
525 """
526 # ignore HG_PENDING because identity is used only for writing
527 self.identity = util.filestat.frompath(
528 self._opener.join(self._filename)
529 )
530
531 try:
532 fp = self._opendirstatefile()
533 try:
534 st = fp.read()
535 finally:
536 fp.close()
537 except IOError as err:
538 if err.errno != errno.ENOENT:
539 raise
540 st = b''
541
542 self._rustmap, parents = rustmod.DirstateMap.new(
543 self._use_dirstate_tree, self._use_dirstate_v2, st
544 )
545
546 if parents and not self._dirtyparents:
547 self.setparents(*parents)
548
549 self.__contains__ = self._rustmap.__contains__
550 self.__getitem__ = self._rustmap.__getitem__
551 self.get = self._rustmap.get
552 return self._rustmap
553
554 def write(self, st, now):
555 parents = self.parents()
556 packed = self._rustmap.write(
557 self._use_dirstate_v2, parents[0], parents[1], now
558 )
559 st.write(packed)
560 st.close()
561 self._dirtyparents = False
562
563 @propertycache
564 def filefoldmap(self):
565 """Returns a dictionary mapping normalized case paths to their
566 non-normalized versions.
567 """
568 return self._rustmap.filefoldmapasdict()
569
570 def hastrackeddir(self, d):
571 return self._rustmap.hastrackeddir(d)
572
573 def hasdir(self, d):
574 return self._rustmap.hasdir(d)
575
576 @propertycache
577 def identity(self):
578 self._rustmap
579 return self.identity
580
581 @property
582 def nonnormalset(self):
583 nonnorm = self._rustmap.non_normal_entries()
584 return nonnorm
585
586 @propertycache
587 def otherparentset(self):
588 otherparents = self._rustmap.other_parent_entries()
589 return otherparents
590
591 def non_normal_or_other_parent_paths(self):
592 return self._rustmap.non_normal_or_other_parent_paths()
593
594 @propertycache
595 def dirfoldmap(self):
596 f = {}
597 normcase = util.normcase
598 for name, _pseudo_entry in self.directories():
599 f[normcase(name)] = name
600 return f