Mercurial > hg
comparison mercurial/dirstate.py @ 13704:a464763e99f1
dirstate: avoid a race with multiple commits in the same process
(issue2264, issue2516)
The race happens when two commits in a row change the same file
without changing its size, *if* those two commits happen in the same
second in the same process while holding the same repo lock. For
example:
commit 1:
M a
M b
commit 2: # same process, same second, same repo lock
M b # modify b without changing its size
M c
This first manifested in transplant, which is the most common way to
do multiple commits in the same process. But it can manifest in any
script or extension that does multiple commits under the same repo
lock. (Thus, the test script tests both transplant and a custom script.)
The problem was that dirstate.status() failed to notice the change to
b when localrepo is about to do the second commit, meaning that change
gets left in the working directory. In the context of transplant, that
means either a crash ("RuntimeError: nothing committed after
transplant") or a silently inaccurate transplant, depending on whether
any other files were modified by the second transplanted changeset.
The fix is to make status() work a little harder when we have
previously marked files as clean (state 'normal') in the same process.
Specifically, dirstate.normal() adds files to self._lastnormal, and
other state-changing methods remove them. Then dirstate.status() puts
any files in self._lastnormal into state 'lookup', which will make
localrepository.status() read file contents to see if it has really
changed. So we pay a small performance penalty for the second (and
subsequent) commits in the same process, without affecting the common
case. Anything that does lots of status updates and checks in the
same process could suffer a performance hit.
Incidentally, there is a simpler fix: call dirstate.normallookup() on
every file updated by commit() at the end of the commit. The trouble
with that solution is that it imposes a performance penalty on the
common case: it means the next status-dependent hg command after every
"hg commit" will be a little bit slower. The patch here is more
complex, but only affects performance for the uncommon case.
author | Greg Ward <greg@gerg.ca> |
---|---|
date | Sun, 20 Mar 2011 17:41:09 -0400 |
parents | 14f3795a5ed7 |
children | 15d1db2abfcb |
comparison
equal
deleted
inserted
replaced
13703:48d606d7192b | 13704:a464763e99f1 |
---|---|
47 self._validate = validate | 47 self._validate = validate |
48 self._root = root | 48 self._root = root |
49 self._rootdir = os.path.join(root, '') | 49 self._rootdir = os.path.join(root, '') |
50 self._dirty = False | 50 self._dirty = False |
51 self._dirtypl = False | 51 self._dirtypl = False |
52 self._lastnormal = set() # files believed to be normal | |
52 self._ui = ui | 53 self._ui = ui |
53 | 54 |
54 @propertycache | 55 @propertycache |
55 def _map(self): | 56 def _map(self): |
56 '''Return the dirstate contents as a map from filename to | 57 '''Return the dirstate contents as a map from filename to |
283 s = os.lstat(self._join(f)) | 284 s = os.lstat(self._join(f)) |
284 self._map[f] = ('n', s.st_mode, s.st_size, int(s.st_mtime)) | 285 self._map[f] = ('n', s.st_mode, s.st_size, int(s.st_mtime)) |
285 if f in self._copymap: | 286 if f in self._copymap: |
286 del self._copymap[f] | 287 del self._copymap[f] |
287 | 288 |
289 # Right now, this file is clean: but if some code in this | |
290 # process modifies it without changing its size before the clock | |
291 # ticks over to the next second, then it won't be clean anymore. | |
292 # So make sure that status() will look harder at it. | |
293 self._lastnormal.add(f) | |
294 | |
288 def normallookup(self, f): | 295 def normallookup(self, f): |
289 '''Mark a file normal, but possibly dirty.''' | 296 '''Mark a file normal, but possibly dirty.''' |
290 if self._pl[1] != nullid and f in self._map: | 297 if self._pl[1] != nullid and f in self._map: |
291 # if there is a merge going on and the file was either | 298 # if there is a merge going on and the file was either |
292 # in state 'm' (-1) or coming from other parent (-2) before | 299 # in state 'm' (-1) or coming from other parent (-2) before |
306 self._dirty = True | 313 self._dirty = True |
307 self._addpath(f) | 314 self._addpath(f) |
308 self._map[f] = ('n', 0, -1, -1) | 315 self._map[f] = ('n', 0, -1, -1) |
309 if f in self._copymap: | 316 if f in self._copymap: |
310 del self._copymap[f] | 317 del self._copymap[f] |
318 self._lastnormal.discard(f) | |
311 | 319 |
312 def otherparent(self, f): | 320 def otherparent(self, f): |
313 '''Mark as coming from the other parent, always dirty.''' | 321 '''Mark as coming from the other parent, always dirty.''' |
314 if self._pl[1] == nullid: | 322 if self._pl[1] == nullid: |
315 raise util.Abort(_("setting %r to other parent " | 323 raise util.Abort(_("setting %r to other parent " |
317 self._dirty = True | 325 self._dirty = True |
318 self._addpath(f) | 326 self._addpath(f) |
319 self._map[f] = ('n', 0, -2, -1) | 327 self._map[f] = ('n', 0, -2, -1) |
320 if f in self._copymap: | 328 if f in self._copymap: |
321 del self._copymap[f] | 329 del self._copymap[f] |
330 self._lastnormal.discard(f) | |
322 | 331 |
323 def add(self, f): | 332 def add(self, f): |
324 '''Mark a file added.''' | 333 '''Mark a file added.''' |
325 self._dirty = True | 334 self._dirty = True |
326 self._addpath(f, True) | 335 self._addpath(f, True) |
327 self._map[f] = ('a', 0, -1, -1) | 336 self._map[f] = ('a', 0, -1, -1) |
328 if f in self._copymap: | 337 if f in self._copymap: |
329 del self._copymap[f] | 338 del self._copymap[f] |
339 self._lastnormal.discard(f) | |
330 | 340 |
331 def remove(self, f): | 341 def remove(self, f): |
332 '''Mark a file removed.''' | 342 '''Mark a file removed.''' |
333 self._dirty = True | 343 self._dirty = True |
334 self._droppath(f) | 344 self._droppath(f) |
341 elif entry[0] == 'n' and entry[2] == -2: # other parent | 351 elif entry[0] == 'n' and entry[2] == -2: # other parent |
342 size = -2 | 352 size = -2 |
343 self._map[f] = ('r', 0, size, 0) | 353 self._map[f] = ('r', 0, size, 0) |
344 if size == 0 and f in self._copymap: | 354 if size == 0 and f in self._copymap: |
345 del self._copymap[f] | 355 del self._copymap[f] |
356 self._lastnormal.discard(f) | |
346 | 357 |
347 def merge(self, f): | 358 def merge(self, f): |
348 '''Mark a file merged.''' | 359 '''Mark a file merged.''' |
349 self._dirty = True | 360 self._dirty = True |
350 s = os.lstat(self._join(f)) | 361 s = os.lstat(self._join(f)) |
351 self._addpath(f) | 362 self._addpath(f) |
352 self._map[f] = ('m', s.st_mode, s.st_size, int(s.st_mtime)) | 363 self._map[f] = ('m', s.st_mode, s.st_size, int(s.st_mtime)) |
353 if f in self._copymap: | 364 if f in self._copymap: |
354 del self._copymap[f] | 365 del self._copymap[f] |
366 self._lastnormal.discard(f) | |
355 | 367 |
356 def forget(self, f): | 368 def forget(self, f): |
357 '''Forget a file.''' | 369 '''Forget a file.''' |
358 self._dirty = True | 370 self._dirty = True |
359 try: | 371 try: |
360 self._droppath(f) | 372 self._droppath(f) |
361 del self._map[f] | 373 del self._map[f] |
362 except KeyError: | 374 except KeyError: |
363 self._ui.warn(_("not in dirstate: %s\n") % f) | 375 self._ui.warn(_("not in dirstate: %s\n") % f) |
376 self._lastnormal.discard(f) | |
364 | 377 |
365 def _normalize(self, path, knownpath): | 378 def _normalize(self, path, knownpath): |
366 norm_path = os.path.normcase(path) | 379 norm_path = os.path.normcase(path) |
367 fold_path = self._foldmap.get(norm_path, None) | 380 fold_path = self._foldmap.get(norm_path, None) |
368 if fold_path is None: | 381 if fold_path is None: |
638 uadd = unknown.append | 651 uadd = unknown.append |
639 iadd = ignored.append | 652 iadd = ignored.append |
640 radd = removed.append | 653 radd = removed.append |
641 dadd = deleted.append | 654 dadd = deleted.append |
642 cadd = clean.append | 655 cadd = clean.append |
656 lastnormal = self._lastnormal.__contains__ | |
643 | 657 |
644 lnkkind = stat.S_IFLNK | 658 lnkkind = stat.S_IFLNK |
645 | 659 |
646 for fn, st in self.walk(match, subrepos, listunknown, | 660 for fn, st in self.walk(match, subrepos, listunknown, |
647 listignored).iteritems(): | 661 listignored).iteritems(): |
670 or fn in self._copymap): | 684 or fn in self._copymap): |
671 madd(fn) | 685 madd(fn) |
672 elif (time != int(st.st_mtime) | 686 elif (time != int(st.st_mtime) |
673 and (mode & lnkkind != lnkkind or self._checklink)): | 687 and (mode & lnkkind != lnkkind or self._checklink)): |
674 ladd(fn) | 688 ladd(fn) |
689 elif lastnormal(fn): | |
690 # If previously in this process we recorded that | |
691 # this file is clean, think twice: intervening code | |
692 # may have modified the file in the same second | |
693 # without changing its size. So force caller to | |
694 # check file contents. Because we're not updating | |
695 # self._map, this only affects the current process. | |
696 # That should be OK because this mainly affects | |
697 # multiple commits in the same process, and each | |
698 # commit by definition makes the committed files | |
699 # clean. | |
700 ladd(fn) | |
675 elif listclean: | 701 elif listclean: |
676 cadd(fn) | 702 cadd(fn) |
677 elif state == 'm': | 703 elif state == 'm': |
678 madd(fn) | 704 madd(fn) |
679 elif state == 'a': | 705 elif state == 'a': |