comparison mercurial/dirstate.py @ 13704:a464763e99f1

dirstate: avoid a race with multiple commits in the same process (issue2264, issue2516) The race happens when two commits in a row change the same file without changing its size, *if* those two commits happen in the same second in the same process while holding the same repo lock. For example: commit 1: M a M b commit 2: # same process, same second, same repo lock M b # modify b without changing its size M c This first manifested in transplant, which is the most common way to do multiple commits in the same process. But it can manifest in any script or extension that does multiple commits under the same repo lock. (Thus, the test script tests both transplant and a custom script.) The problem was that dirstate.status() failed to notice the change to b when localrepo is about to do the second commit, meaning that change gets left in the working directory. In the context of transplant, that means either a crash ("RuntimeError: nothing committed after transplant") or a silently inaccurate transplant, depending on whether any other files were modified by the second transplanted changeset. The fix is to make status() work a little harder when we have previously marked files as clean (state 'normal') in the same process. Specifically, dirstate.normal() adds files to self._lastnormal, and other state-changing methods remove them. Then dirstate.status() puts any files in self._lastnormal into state 'lookup', which will make localrepository.status() read file contents to see if it has really changed. So we pay a small performance penalty for the second (and subsequent) commits in the same process, without affecting the common case. Anything that does lots of status updates and checks in the same process could suffer a performance hit. Incidentally, there is a simpler fix: call dirstate.normallookup() on every file updated by commit() at the end of the commit. The trouble with that solution is that it imposes a performance penalty on the common case: it means the next status-dependent hg command after every "hg commit" will be a little bit slower. The patch here is more complex, but only affects performance for the uncommon case.
author Greg Ward <greg@gerg.ca>
date Sun, 20 Mar 2011 17:41:09 -0400
parents 14f3795a5ed7
children 15d1db2abfcb
comparison
equal deleted inserted replaced
13703:48d606d7192b 13704:a464763e99f1
47 self._validate = validate 47 self._validate = validate
48 self._root = root 48 self._root = root
49 self._rootdir = os.path.join(root, '') 49 self._rootdir = os.path.join(root, '')
50 self._dirty = False 50 self._dirty = False
51 self._dirtypl = False 51 self._dirtypl = False
52 self._lastnormal = set() # files believed to be normal
52 self._ui = ui 53 self._ui = ui
53 54
54 @propertycache 55 @propertycache
55 def _map(self): 56 def _map(self):
56 '''Return the dirstate contents as a map from filename to 57 '''Return the dirstate contents as a map from filename to
283 s = os.lstat(self._join(f)) 284 s = os.lstat(self._join(f))
284 self._map[f] = ('n', s.st_mode, s.st_size, int(s.st_mtime)) 285 self._map[f] = ('n', s.st_mode, s.st_size, int(s.st_mtime))
285 if f in self._copymap: 286 if f in self._copymap:
286 del self._copymap[f] 287 del self._copymap[f]
287 288
289 # Right now, this file is clean: but if some code in this
290 # process modifies it without changing its size before the clock
291 # ticks over to the next second, then it won't be clean anymore.
292 # So make sure that status() will look harder at it.
293 self._lastnormal.add(f)
294
288 def normallookup(self, f): 295 def normallookup(self, f):
289 '''Mark a file normal, but possibly dirty.''' 296 '''Mark a file normal, but possibly dirty.'''
290 if self._pl[1] != nullid and f in self._map: 297 if self._pl[1] != nullid and f in self._map:
291 # if there is a merge going on and the file was either 298 # if there is a merge going on and the file was either
292 # in state 'm' (-1) or coming from other parent (-2) before 299 # in state 'm' (-1) or coming from other parent (-2) before
306 self._dirty = True 313 self._dirty = True
307 self._addpath(f) 314 self._addpath(f)
308 self._map[f] = ('n', 0, -1, -1) 315 self._map[f] = ('n', 0, -1, -1)
309 if f in self._copymap: 316 if f in self._copymap:
310 del self._copymap[f] 317 del self._copymap[f]
318 self._lastnormal.discard(f)
311 319
312 def otherparent(self, f): 320 def otherparent(self, f):
313 '''Mark as coming from the other parent, always dirty.''' 321 '''Mark as coming from the other parent, always dirty.'''
314 if self._pl[1] == nullid: 322 if self._pl[1] == nullid:
315 raise util.Abort(_("setting %r to other parent " 323 raise util.Abort(_("setting %r to other parent "
317 self._dirty = True 325 self._dirty = True
318 self._addpath(f) 326 self._addpath(f)
319 self._map[f] = ('n', 0, -2, -1) 327 self._map[f] = ('n', 0, -2, -1)
320 if f in self._copymap: 328 if f in self._copymap:
321 del self._copymap[f] 329 del self._copymap[f]
330 self._lastnormal.discard(f)
322 331
323 def add(self, f): 332 def add(self, f):
324 '''Mark a file added.''' 333 '''Mark a file added.'''
325 self._dirty = True 334 self._dirty = True
326 self._addpath(f, True) 335 self._addpath(f, True)
327 self._map[f] = ('a', 0, -1, -1) 336 self._map[f] = ('a', 0, -1, -1)
328 if f in self._copymap: 337 if f in self._copymap:
329 del self._copymap[f] 338 del self._copymap[f]
339 self._lastnormal.discard(f)
330 340
331 def remove(self, f): 341 def remove(self, f):
332 '''Mark a file removed.''' 342 '''Mark a file removed.'''
333 self._dirty = True 343 self._dirty = True
334 self._droppath(f) 344 self._droppath(f)
341 elif entry[0] == 'n' and entry[2] == -2: # other parent 351 elif entry[0] == 'n' and entry[2] == -2: # other parent
342 size = -2 352 size = -2
343 self._map[f] = ('r', 0, size, 0) 353 self._map[f] = ('r', 0, size, 0)
344 if size == 0 and f in self._copymap: 354 if size == 0 and f in self._copymap:
345 del self._copymap[f] 355 del self._copymap[f]
356 self._lastnormal.discard(f)
346 357
347 def merge(self, f): 358 def merge(self, f):
348 '''Mark a file merged.''' 359 '''Mark a file merged.'''
349 self._dirty = True 360 self._dirty = True
350 s = os.lstat(self._join(f)) 361 s = os.lstat(self._join(f))
351 self._addpath(f) 362 self._addpath(f)
352 self._map[f] = ('m', s.st_mode, s.st_size, int(s.st_mtime)) 363 self._map[f] = ('m', s.st_mode, s.st_size, int(s.st_mtime))
353 if f in self._copymap: 364 if f in self._copymap:
354 del self._copymap[f] 365 del self._copymap[f]
366 self._lastnormal.discard(f)
355 367
356 def forget(self, f): 368 def forget(self, f):
357 '''Forget a file.''' 369 '''Forget a file.'''
358 self._dirty = True 370 self._dirty = True
359 try: 371 try:
360 self._droppath(f) 372 self._droppath(f)
361 del self._map[f] 373 del self._map[f]
362 except KeyError: 374 except KeyError:
363 self._ui.warn(_("not in dirstate: %s\n") % f) 375 self._ui.warn(_("not in dirstate: %s\n") % f)
376 self._lastnormal.discard(f)
364 377
365 def _normalize(self, path, knownpath): 378 def _normalize(self, path, knownpath):
366 norm_path = os.path.normcase(path) 379 norm_path = os.path.normcase(path)
367 fold_path = self._foldmap.get(norm_path, None) 380 fold_path = self._foldmap.get(norm_path, None)
368 if fold_path is None: 381 if fold_path is None:
638 uadd = unknown.append 651 uadd = unknown.append
639 iadd = ignored.append 652 iadd = ignored.append
640 radd = removed.append 653 radd = removed.append
641 dadd = deleted.append 654 dadd = deleted.append
642 cadd = clean.append 655 cadd = clean.append
656 lastnormal = self._lastnormal.__contains__
643 657
644 lnkkind = stat.S_IFLNK 658 lnkkind = stat.S_IFLNK
645 659
646 for fn, st in self.walk(match, subrepos, listunknown, 660 for fn, st in self.walk(match, subrepos, listunknown,
647 listignored).iteritems(): 661 listignored).iteritems():
670 or fn in self._copymap): 684 or fn in self._copymap):
671 madd(fn) 685 madd(fn)
672 elif (time != int(st.st_mtime) 686 elif (time != int(st.st_mtime)
673 and (mode & lnkkind != lnkkind or self._checklink)): 687 and (mode & lnkkind != lnkkind or self._checklink)):
674 ladd(fn) 688 ladd(fn)
689 elif lastnormal(fn):
690 # If previously in this process we recorded that
691 # this file is clean, think twice: intervening code
692 # may have modified the file in the same second
693 # without changing its size. So force caller to
694 # check file contents. Because we're not updating
695 # self._map, this only affects the current process.
696 # That should be OK because this mainly affects
697 # multiple commits in the same process, and each
698 # commit by definition makes the committed files
699 # clean.
700 ladd(fn)
675 elif listclean: 701 elif listclean:
676 cadd(fn) 702 cadd(fn)
677 elif state == 'm': 703 elif state == 'm':
678 madd(fn) 704 madd(fn)
679 elif state == 'a': 705 elif state == 'a':