Mercurial > hg
comparison mercurial/dirstate.py @ 34672:e159f217230e
dirstate: move _read into dirstatemap
As part of separating the dirstate business logic from the storage, let's move
the read code into the new dirstatemap class.
Differential Revision: https://phab.mercurial-scm.org/D977
author | Durham Goode <durham@fb.com> |
---|---|
date | Thu, 05 Oct 2017 11:34:41 -0700 |
parents | cbda631c1dde |
children | e2214632c3a2 |
comparison
equal
deleted
inserted
replaced
34671:16d73491416b | 34672:e159f217230e |
---|---|
384 except: # re-raises | 384 except: # re-raises |
385 f.discard() | 385 f.discard() |
386 raise | 386 raise |
387 | 387 |
388 def _read(self): | 388 def _read(self): |
389 self._map = dirstatemap(self._ui, self._opener, self._root) | |
390 | |
391 # ignore HG_PENDING because identity is used only for writing | 389 # ignore HG_PENDING because identity is used only for writing |
392 self._identity = util.filestat.frompath( | 390 self._identity = util.filestat.frompath( |
393 self._opener.join(self._filename)) | 391 self._opener.join(self._filename)) |
394 try: | 392 self._map = dirstatemap(self._ui, self._opener, self._root) |
395 fp = self._map._opendirstatefile() | 393 self._map.read() |
396 try: | |
397 st = fp.read() | |
398 finally: | |
399 fp.close() | |
400 except IOError as err: | |
401 if err.errno != errno.ENOENT: | |
402 raise | |
403 return | |
404 if not st: | |
405 return | |
406 | |
407 if util.safehasattr(parsers, 'dict_new_presized'): | |
408 # Make an estimate of the number of files in the dirstate based on | |
409 # its size. From a linear regression on a set of real-world repos, | |
410 # all over 10,000 files, the size of a dirstate entry is 85 | |
411 # bytes. The cost of resizing is significantly higher than the cost | |
412 # of filling in a larger presized dict, so subtract 20% from the | |
413 # size. | |
414 # | |
415 # This heuristic is imperfect in many ways, so in a future dirstate | |
416 # format update it makes sense to just record the number of entries | |
417 # on write. | |
418 self._map._map = parsers.dict_new_presized(len(st) / 71) | |
419 | |
420 # Python's garbage collector triggers a GC each time a certain number | |
421 # of container objects (the number being defined by | |
422 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple | |
423 # for each file in the dirstate. The C version then immediately marks | |
424 # them as not to be tracked by the collector. However, this has no | |
425 # effect on when GCs are triggered, only on what objects the GC looks | |
426 # into. This means that O(number of files) GCs are unavoidable. | |
427 # Depending on when in the process's lifetime the dirstate is parsed, | |
428 # this can get very expensive. As a workaround, disable GC while | |
429 # parsing the dirstate. | |
430 # | |
431 # (we cannot decorate the function directly since it is in a C module) | |
432 parse_dirstate = util.nogc(parsers.parse_dirstate) | |
433 p = parse_dirstate(self._map._map, self._map.copymap, st) | |
434 if not self._map._dirtyparents: | |
435 self._map.setparents(*p) | |
436 | 394 |
437 def invalidate(self): | 395 def invalidate(self): |
438 '''Causes the next access to reread the dirstate. | 396 '''Causes the next access to reread the dirstate. |
439 | 397 |
440 This is different from localrepo.invalidatedirstate() because it always | 398 This is different from localrepo.invalidatedirstate() because it always |
1397 return self._parents | 1355 return self._parents |
1398 | 1356 |
1399 def setparents(self, p1, p2): | 1357 def setparents(self, p1, p2): |
1400 self._parents = (p1, p2) | 1358 self._parents = (p1, p2) |
1401 self._dirtyparents = True | 1359 self._dirtyparents = True |
1360 | |
1361 def read(self): | |
1362 try: | |
1363 fp = self._opendirstatefile() | |
1364 try: | |
1365 st = fp.read() | |
1366 finally: | |
1367 fp.close() | |
1368 except IOError as err: | |
1369 if err.errno != errno.ENOENT: | |
1370 raise | |
1371 return | |
1372 if not st: | |
1373 return | |
1374 | |
1375 if util.safehasattr(parsers, 'dict_new_presized'): | |
1376 # Make an estimate of the number of files in the dirstate based on | |
1377 # its size. From a linear regression on a set of real-world repos, | |
1378 # all over 10,000 files, the size of a dirstate entry is 85 | |
1379 # bytes. The cost of resizing is significantly higher than the cost | |
1380 # of filling in a larger presized dict, so subtract 20% from the | |
1381 # size. | |
1382 # | |
1383 # This heuristic is imperfect in many ways, so in a future dirstate | |
1384 # format update it makes sense to just record the number of entries | |
1385 # on write. | |
1386 self._map = parsers.dict_new_presized(len(st) / 71) | |
1387 | |
1388 # Python's garbage collector triggers a GC each time a certain number | |
1389 # of container objects (the number being defined by | |
1390 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple | |
1391 # for each file in the dirstate. The C version then immediately marks | |
1392 # them as not to be tracked by the collector. However, this has no | |
1393 # effect on when GCs are triggered, only on what objects the GC looks | |
1394 # into. This means that O(number of files) GCs are unavoidable. | |
1395 # Depending on when in the process's lifetime the dirstate is parsed, | |
1396 # this can get very expensive. As a workaround, disable GC while | |
1397 # parsing the dirstate. | |
1398 # | |
1399 # (we cannot decorate the function directly since it is in a C module) | |
1400 parse_dirstate = util.nogc(parsers.parse_dirstate) | |
1401 p = parse_dirstate(self._map, self.copymap, st) | |
1402 if not self._dirtyparents: | |
1403 self.setparents(*p) |