comparison mercurial/dirstate.py @ 34672:e159f217230e

dirstate: move _read into dirstatemap As part of separating the dirstate business logic from the storage, let's move the read code into the new dirstatemap class. Differential Revision: https://phab.mercurial-scm.org/D977
author Durham Goode <durham@fb.com>
date Thu, 05 Oct 2017 11:34:41 -0700
parents cbda631c1dde
children e2214632c3a2
comparison
equal deleted inserted replaced
34671:16d73491416b 34672:e159f217230e
384 except: # re-raises 384 except: # re-raises
385 f.discard() 385 f.discard()
386 raise 386 raise
387 387
388 def _read(self): 388 def _read(self):
389 self._map = dirstatemap(self._ui, self._opener, self._root)
390
391 # ignore HG_PENDING because identity is used only for writing 389 # ignore HG_PENDING because identity is used only for writing
392 self._identity = util.filestat.frompath( 390 self._identity = util.filestat.frompath(
393 self._opener.join(self._filename)) 391 self._opener.join(self._filename))
394 try: 392 self._map = dirstatemap(self._ui, self._opener, self._root)
395 fp = self._map._opendirstatefile() 393 self._map.read()
396 try:
397 st = fp.read()
398 finally:
399 fp.close()
400 except IOError as err:
401 if err.errno != errno.ENOENT:
402 raise
403 return
404 if not st:
405 return
406
407 if util.safehasattr(parsers, 'dict_new_presized'):
408 # Make an estimate of the number of files in the dirstate based on
409 # its size. From a linear regression on a set of real-world repos,
410 # all over 10,000 files, the size of a dirstate entry is 85
411 # bytes. The cost of resizing is significantly higher than the cost
412 # of filling in a larger presized dict, so subtract 20% from the
413 # size.
414 #
415 # This heuristic is imperfect in many ways, so in a future dirstate
416 # format update it makes sense to just record the number of entries
417 # on write.
418 self._map._map = parsers.dict_new_presized(len(st) / 71)
419
420 # Python's garbage collector triggers a GC each time a certain number
421 # of container objects (the number being defined by
422 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
423 # for each file in the dirstate. The C version then immediately marks
424 # them as not to be tracked by the collector. However, this has no
425 # effect on when GCs are triggered, only on what objects the GC looks
426 # into. This means that O(number of files) GCs are unavoidable.
427 # Depending on when in the process's lifetime the dirstate is parsed,
428 # this can get very expensive. As a workaround, disable GC while
429 # parsing the dirstate.
430 #
431 # (we cannot decorate the function directly since it is in a C module)
432 parse_dirstate = util.nogc(parsers.parse_dirstate)
433 p = parse_dirstate(self._map._map, self._map.copymap, st)
434 if not self._map._dirtyparents:
435 self._map.setparents(*p)
436 394
437 def invalidate(self): 395 def invalidate(self):
438 '''Causes the next access to reread the dirstate. 396 '''Causes the next access to reread the dirstate.
439 397
440 This is different from localrepo.invalidatedirstate() because it always 398 This is different from localrepo.invalidatedirstate() because it always
1397 return self._parents 1355 return self._parents
1398 1356
1399 def setparents(self, p1, p2): 1357 def setparents(self, p1, p2):
1400 self._parents = (p1, p2) 1358 self._parents = (p1, p2)
1401 self._dirtyparents = True 1359 self._dirtyparents = True
1360
1361 def read(self):
1362 try:
1363 fp = self._opendirstatefile()
1364 try:
1365 st = fp.read()
1366 finally:
1367 fp.close()
1368 except IOError as err:
1369 if err.errno != errno.ENOENT:
1370 raise
1371 return
1372 if not st:
1373 return
1374
1375 if util.safehasattr(parsers, 'dict_new_presized'):
1376 # Make an estimate of the number of files in the dirstate based on
1377 # its size. From a linear regression on a set of real-world repos,
1378 # all over 10,000 files, the size of a dirstate entry is 85
1379 # bytes. The cost of resizing is significantly higher than the cost
1380 # of filling in a larger presized dict, so subtract 20% from the
1381 # size.
1382 #
1383 # This heuristic is imperfect in many ways, so in a future dirstate
1384 # format update it makes sense to just record the number of entries
1385 # on write.
1386 self._map = parsers.dict_new_presized(len(st) / 71)
1387
1388 # Python's garbage collector triggers a GC each time a certain number
1389 # of container objects (the number being defined by
1390 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
1391 # for each file in the dirstate. The C version then immediately marks
1392 # them as not to be tracked by the collector. However, this has no
1393 # effect on when GCs are triggered, only on what objects the GC looks
1394 # into. This means that O(number of files) GCs are unavoidable.
1395 # Depending on when in the process's lifetime the dirstate is parsed,
1396 # this can get very expensive. As a workaround, disable GC while
1397 # parsing the dirstate.
1398 #
1399 # (we cannot decorate the function directly since it is in a C module)
1400 parse_dirstate = util.nogc(parsers.parse_dirstate)
1401 p = parse_dirstate(self._map, self.copymap, st)
1402 if not self._dirtyparents:
1403 self.setparents(*p)