merge: fix race that could cause wrong size in dirstate
The problem is that hg merge/update/etc work the following way:
1. figure out what files to update
2. apply the update to disk
3. apply the update to in-memory dirstate
4. write dirstate
where step3 looks at the filesystem and assumes it sees the result of
step2. If a file is changed between step2 and step3, step3 will record
incorrect information in the dirstate.
I avoid this by passing the size step3 needs directly from step2, for
the common path (not implemented for change/delete conflicts for
instance).
I didn't fix the same race for the exec bit for now, because it's less
likely to be problematic and I had trouble due to the fact that the
dirstate stores the permissions differently from the manifest (st_mode
vs '' 'l' 'x'), in combination with tests that pretend that symlinks
are not supported.
However, I moved the lstat from step3 to step2, which should tighten
the race window markedly, both for the exec bit and for the mtime.
Differential Revision: https://phab.mercurial-scm.org/D6475
--- a/hgext/largefiles/overrides.py Wed Jun 12 13:10:52 2019 -0400
+++ b/hgext/largefiles/overrides.py Mon May 27 16:55:46 2019 -0400
@@ -515,7 +515,7 @@
return actions, diverge, renamedelete
@eh.wrapfunction(merge, 'recordupdates')
-def mergerecordupdates(orig, repo, actions, branchmerge):
+def mergerecordupdates(orig, repo, actions, branchmerge, getfiledata):
if 'lfmr' in actions:
lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
for lfile, args, msg in actions['lfmr']:
@@ -526,7 +526,7 @@
lfdirstate.add(lfile)
lfdirstate.write()
- return orig(repo, actions, branchmerge)
+ return orig(repo, actions, branchmerge, getfiledata)
# Override filemerge to prompt the user about how they wish to merge
# largefiles. This will handle identical edits without prompting the user.
--- a/hgext/narrow/narrowdirstate.py Wed Jun 12 13:10:52 2019 -0400
+++ b/hgext/narrow/narrowdirstate.py Mon May 27 16:55:46 2019 -0400
@@ -16,21 +16,21 @@
"""Add narrow spec dirstate ignore, block changes outside narrow spec."""
def _editfunc(fn):
- def _wrapper(self, *args):
+ def _wrapper(self, *args, **kwargs):
narrowmatch = repo.narrowmatch()
for f in args:
if f is not None and not narrowmatch(f) and f not in self:
raise error.Abort(_("cannot track '%s' - it is outside " +
"the narrow clone") % f)
- return fn(self, *args)
+ return fn(self, *args, **kwargs)
return _wrapper
class narrowdirstate(dirstate.__class__):
# Prevent adding/editing/copying/deleting files that are outside the
# sparse checkout
@_editfunc
- def normal(self, *args):
- return super(narrowdirstate, self).normal(*args)
+ def normal(self, *args, **kwargs):
+ return super(narrowdirstate, self).normal(*args, **kwargs)
@_editfunc
def add(self, *args):
--- a/hgext/remotefilelog/__init__.py Wed Jun 12 13:10:52 2019 -0400
+++ b/hgext/remotefilelog/__init__.py Mon May 27 16:55:46 2019 -0400
@@ -442,7 +442,8 @@
return s
# prefetch files before update
-def applyupdates(orig, repo, actions, wctx, mctx, overwrite, labels=None):
+def applyupdates(orig, repo, actions, wctx, mctx, overwrite, wantfiledata,
+ labels=None):
if isenabled(repo):
manifest = mctx.manifest()
files = []
@@ -450,7 +451,8 @@
files.append((f, hex(manifest[f])))
# batch fetch the needed files from the server
repo.fileservice.prefetch(files)
- return orig(repo, actions, wctx, mctx, overwrite, labels=labels)
+ return orig(repo, actions, wctx, mctx, overwrite, wantfiledata,
+ labels=labels)
# Prefetch merge checkunknownfiles
def checkunknownfiles(orig, repo, wctx, mctx, force, actions,
--- a/hgext/sparse.py Wed Jun 12 13:10:52 2019 -0400
+++ b/hgext/sparse.py Mon May 27 16:55:46 2019 -0400
@@ -228,7 +228,7 @@
hint = _('include file with `hg debugsparse --include <pattern>` or use ' +
'`hg add -s <file>` to include file directory while adding')
for func in editfuncs:
- def _wrapper(orig, self, *args):
+ def _wrapper(orig, self, *args, **kwargs):
sparsematch = self._sparsematcher
if not sparsematch.always():
for f in args:
@@ -237,7 +237,7 @@
raise error.Abort(_("cannot add '%s' - it is outside "
"the sparse checkout") % f,
hint=hint)
- return orig(self, *args)
+ return orig(self, *args, **kwargs)
extensions.wrapfunction(dirstate.dirstate, func, _wrapper)
@command('debugsparse', [
--- a/mercurial/context.py Wed Jun 12 13:10:52 2019 -0400
+++ b/mercurial/context.py Mon May 27 16:55:46 2019 -0400
@@ -1766,6 +1766,8 @@
def size(self):
return self._repo.wvfs.lstat(self._path).st_size
+ def lstat(self):
+ return self._repo.wvfs.lstat(self._path)
def date(self):
t, tz = self._changectx.date()
try:
@@ -1801,9 +1803,9 @@
def write(self, data, flags, backgroundclose=False, **kwargs):
"""wraps repo.wwrite"""
- self._repo.wwrite(self._path, data, flags,
- backgroundclose=backgroundclose,
- **kwargs)
+ return self._repo.wwrite(self._path, data, flags,
+ backgroundclose=backgroundclose,
+ **kwargs)
def markcopied(self, src):
"""marks this file a copy of `src`"""
--- a/mercurial/dirstate.py Wed Jun 12 13:10:52 2019 -0400
+++ b/mercurial/dirstate.py Mon May 27 16:55:46 2019 -0400
@@ -391,12 +391,24 @@
self._updatedfiles.add(f)
self._map.addfile(f, oldstate, state, mode, size, mtime)
- def normal(self, f):
- '''Mark a file normal and clean.'''
- s = os.lstat(self._join(f))
- mtime = s[stat.ST_MTIME]
- self._addpath(f, 'n', s.st_mode,
- s.st_size & _rangemask, mtime & _rangemask)
+ def normal(self, f, parentfiledata=None):
+ '''Mark a file normal and clean.
+
+ parentfiledata: (mode, size, mtime) of the clean file
+
+ parentfiledata should be computed from memory (for mode,
+ size), as or close as possible from the point where we
+ determined the file was clean, to limit the risk of the
+ file having been changed by an external process between the
+ moment where the file was determined to be clean and now.'''
+ if parentfiledata:
+ (mode, size, mtime) = parentfiledata
+ else:
+ s = os.lstat(self._join(f))
+ mode = s.st_mode
+ size = s.st_size
+ mtime = s[stat.ST_MTIME]
+ self._addpath(f, 'n', mode, size & _rangemask, mtime & _rangemask)
self._map.copymap.pop(f, None)
if f in self._map.nonnormalset:
self._map.nonnormalset.remove(f)
--- a/mercurial/merge.py Wed Jun 12 13:10:52 2019 -0400
+++ b/mercurial/merge.py Mon May 27 16:55:46 2019 -0400
@@ -10,6 +10,7 @@
import errno
import hashlib
import shutil
+import stat
import struct
from .i18n import _
@@ -683,7 +684,7 @@
def recordactions(self):
"""record remove/add/get actions in the dirstate"""
branchmerge = self._repo.dirstate.p2() != nullid
- recordupdates(self._repo, self.actions(), branchmerge)
+ recordupdates(self._repo, self.actions(), branchmerge, None)
def queueremove(self, f):
"""queues a file to be removed from the dirstate
@@ -1464,13 +1465,17 @@
repo.ui.warn(_("current directory was removed\n"
"(consider changing to repo root: %s)\n") % repo.root)
-def batchget(repo, mctx, wctx, actions):
+def batchget(repo, mctx, wctx, wantfiledata, actions):
"""apply gets to the working directory
mctx is the context to get from
- yields tuples for progress updates
+ Yields arbitrarily many (False, tuple) for progress updates, followed by
+ exactly one (True, filedata). When wantfiledata is false, filedata is an
+ empty list. When wantfiledata is true, filedata[i] is a triple (mode, size,
+ mtime) of the file written for action[i].
"""
+ filedata = []
verbose = repo.ui.verbose
fctx = mctx.filectx
ui = repo.ui
@@ -1494,16 +1499,24 @@
if repo.wvfs.lexists(conflicting):
orig = scmutil.backuppath(ui, repo, conflicting)
util.rename(repo.wjoin(conflicting), orig)
- wctx[f].clearunknown()
+ wfctx = wctx[f]
+ wfctx.clearunknown()
atomictemp = ui.configbool("experimental", "update.atomic-file")
- wctx[f].write(fctx(f).data(), flags, backgroundclose=True,
- atomictemp=atomictemp)
+ size = wfctx.write(fctx(f).data(), flags,
+ backgroundclose=True,
+ atomictemp=atomictemp)
+ if wantfiledata:
+ s = wfctx.lstat()
+ mode = s.st_mode
+ mtime = s[stat.ST_MTIME]
+ filedata.append((mode, size, mtime)) # for dirstate.normal
if i == 100:
- yield i, f
+ yield False, (i, f)
i = 0
i += 1
if i > 0:
- yield i, f
+ yield False, (i, f)
+ yield True, filedata
def _prefetchfiles(repo, ctx, actions):
"""Invoke ``scmutil.prefetchfiles()`` for the files relevant to the dict
@@ -1550,14 +1563,17 @@
ACTION_PATH_CONFLICT,
ACTION_PATH_CONFLICT_RESOLVE))
-def applyupdates(repo, actions, wctx, mctx, overwrite, labels=None):
+def applyupdates(repo, actions, wctx, mctx, overwrite, wantfiledata,
+ labels=None):
"""apply the merge action list to the working directory
wctx is the working copy context
mctx is the context to be merged into the working copy
- Return a tuple of counts (updated, merged, removed, unresolved) that
- describes how many files were affected by the update.
+ Return a tuple of (counts, filedata), where counts is a tuple
+ (updated, merged, removed, unresolved) that describes how many
+ files were affected by the update, and filedata is as described in
+ batchget.
"""
_prefetchfiles(repo, mctx, actions)
@@ -1649,11 +1665,18 @@
# get in parallel.
threadsafe = repo.ui.configbool('experimental',
'worker.wdir-get-thread-safe')
- prog = worker.worker(repo.ui, cost, batchget, (repo, mctx, wctx),
+ prog = worker.worker(repo.ui, cost, batchget,
+ (repo, mctx, wctx, wantfiledata),
actions[ACTION_GET],
- threadsafe=threadsafe)
- for i, item in prog:
- progress.increment(step=i, item=item)
+ threadsafe=threadsafe,
+ hasretval=True)
+ getfiledata = []
+ for final, res in prog:
+ if final:
+ getfiledata = res
+ else:
+ i, item = res
+ progress.increment(step=i, item=item)
updated = len(actions[ACTION_GET])
if [a for a in actions[ACTION_GET] if a[0] == '.hgsubstate']:
@@ -1778,6 +1801,9 @@
mfiles = set(a[0] for a in actions[ACTION_MERGE])
for k, acts in extraactions.iteritems():
actions[k].extend(acts)
+ if k == ACTION_GET and wantfiledata:
+ # no filedata until mergestate is updated to provide it
+ getfiledata.extend([None] * len(acts))
# Remove these files from actions[ACTION_MERGE] as well. This is
# important because in recordupdates, files in actions[ACTION_MERGE]
# are processed after files in other actions, and the merge driver
@@ -1800,9 +1826,10 @@
if a[0] in mfiles]
progress.complete()
- return updateresult(updated, merged, removed, unresolved)
+ assert len(getfiledata) == (len(actions[ACTION_GET]) if wantfiledata else 0)
+ return updateresult(updated, merged, removed, unresolved), getfiledata
-def recordupdates(repo, actions, branchmerge):
+def recordupdates(repo, actions, branchmerge, getfiledata):
"record merge actions to the dirstate"
# remove (must come first)
for f, args, msg in actions.get(ACTION_REMOVE, []):
@@ -1846,11 +1873,12 @@
pass
# get
- for f, args, msg in actions.get(ACTION_GET, []):
+ for i, (f, args, msg) in enumerate(actions.get(ACTION_GET, [])):
if branchmerge:
repo.dirstate.otherparent(f)
else:
- repo.dirstate.normal(f)
+ parentfiledata = getfiledata[i] if getfiledata else None
+ repo.dirstate.normal(f, parentfiledata=parentfiledata)
# merge
for f, args, msg in actions.get(ACTION_MERGE, []):
@@ -2166,12 +2194,15 @@
'fsmonitor enabled; enable fsmonitor to improve performance; '
'see "hg help -e fsmonitor")\n'))
- stats = applyupdates(repo, actions, wc, p2, overwrite, labels=labels)
+ updatedirstate = not partial and not wc.isinmemory()
+ wantfiledata = updatedirstate and not branchmerge
+ stats, getfiledata = applyupdates(repo, actions, wc, p2, overwrite,
+ wantfiledata, labels=labels)
- if not partial and not wc.isinmemory():
+ if updatedirstate:
with repo.dirstate.parentchange():
repo.setparents(fp1, fp2)
- recordupdates(repo, actions, branchmerge)
+ recordupdates(repo, actions, branchmerge, getfiledata)
# update completed, clear state
util.unlink(repo.vfs.join('updatestate'))
--- a/mercurial/narrowspec.py Wed Jun 12 13:10:52 2019 -0400
+++ b/mercurial/narrowspec.py Mon May 27 16:55:46 2019 -0400
@@ -259,7 +259,7 @@
if not repo.wvfs.exists(f):
addgaction((f, (mf.flags(f), False), "narrowspec updated"))
merge.applyupdates(repo, actions, wctx=repo[None],
- mctx=repo['.'], overwrite=False)
+ mctx=repo['.'], overwrite=False, wantfiledata=False)
def checkworkingcopynarrowspec(repo):
# Avoid infinite recursion when updating the working copy
--- a/mercurial/sparse.py Wed Jun 12 13:10:52 2019 -0400
+++ b/mercurial/sparse.py Mon May 27 16:55:46 2019 -0400
@@ -248,7 +248,8 @@
typeactions = mergemod.emptyactions()
typeactions['r'] = actions
- mergemod.applyupdates(repo, typeactions, repo[None], repo['.'], False)
+ mergemod.applyupdates(repo, typeactions, repo[None], repo['.'], False,
+ wantfiledata=False)
# Fix dirstate
for file in dropped:
@@ -382,7 +383,7 @@
typeactions = mergemod.emptyactions()
typeactions['g'] = actions
mergemod.applyupdates(repo, typeactions, repo[None], repo['.'],
- False)
+ False, wantfiledata=False)
dirstate = repo.dirstate
for file, flags, msg in actions:
@@ -486,7 +487,8 @@
for f, (m, args, msg) in actions.iteritems():
typeactions[m].append((f, args, msg))
- mergemod.applyupdates(repo, typeactions, repo[None], repo['.'], False)
+ mergemod.applyupdates(repo, typeactions, repo[None], repo['.'], False,
+ wantfiledata=False)
# Fix dirstate
for file in added:
--- a/tests/test-dirstate-race2.t Wed Jun 12 13:10:52 2019 -0400
+++ b/tests/test-dirstate-race2.t Mon May 27 16:55:46 2019 -0400
@@ -27,13 +27,11 @@
> EOF
Do an update where file 'a' is changed between hg writing it to disk
-and hg writing the dirstate. It results in a corrupted dirstate, which
-stores the wrong size, and thus hg status shows spuriously modified
-files.
+and hg writing the dirstate. The dirstate is correct nonetheless, and
+so hg status correctly shows a as clean.
$ hg up -r 0 --config extensions.race=$TESTTMP/dirstaterace.py
1 files updated, 0 files merged, 0 files removed, 0 files unresolved
$ hg debugdirstate --no-dates
- n 644 0 (set |unset) a (re)
+ n 644 2 (set |unset) a (re)
$ echo a > a; hg status; hg diff
- M a
--- a/tests/test-dirstate.t Wed Jun 12 13:10:52 2019 -0400
+++ b/tests/test-dirstate.t Mon May 27 16:55:46 2019 -0400
@@ -73,7 +73,7 @@
> merge,
> )
>
- > def wraprecordupdates(orig, repo, actions, branchmerge):
+ > def wraprecordupdates(*args):
> raise error.Abort("simulated error while recording dirstateupdates")
>
> def reposetup(ui, repo):