Mercurial > hg
changeset 14452:ee574cfd0c32
patch: use temporary files to handle intermediate copies
git patches may require copies to be handled out-of-order. For instance, take
the following sequence:
* modify a
* copy a into b
Here, we have to generate b from a before its modification. To do so,
applydiff() was scanning for copy metadata and performing the copies before
processing the other changes in-order. While smart and efficient, this approach
complicates things by handling file copies and file creations at different
places and times. While a new file must not exist before being patched a copied
file already exists before applying the first hunk.
Instead of copying the files at their final destination before patching, we
store them in a temporary file location and retrieve them when patching. The
filestore always stores file content in real files but nothing prevents adding
a cache layer. The filestore class was kept separate from fsbackend for at
least two reasons:
- This class is likely to be reused as a temporary result store for a future
repository patching call (entries just have to be extended to contain copy
sources).
- Delegating this role to backends might be more efficient in a repository
backend case: the source files are already available in the repository itself
and do not need to be copied again. It also means that third-parties backend
would have to implement two other methods. If we ever decide to merge the
filestore feature into backend, a minimalistic approach would be to compose
with filestore directly. Keep in mind this copy overhead only applies for
copy/rename sources, and may even be reduced to copy sources which have to
handled ahead of time.
author | Patrick Mezard <pmezard@gmail.com> |
---|---|
date | Fri, 27 May 2011 21:50:10 +0200 |
parents | c78d41db6f88 |
children | ea3d548132cc |
files | hgext/keyword.py mercurial/patch.py tests/test-git-import.t tests/test-import.t tests/test-mq-qrefresh.t tests/test-mq-symlinks.t |
diffstat | 6 files changed, 126 insertions(+), 91 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/keyword.py Fri May 27 21:50:09 2011 +0200 +++ b/hgext/keyword.py Fri May 27 21:50:10 2011 +0200 @@ -595,11 +595,12 @@ wlock.release() # monkeypatches - def kwpatchfile_init(orig, self, ui, fname, backend, mode, create, remove, - missing=False, eolmode=None): + def kwpatchfile_init(orig, self, ui, fname, backend, store, mode, create, + remove, eolmode=None, copysource=None): '''Monkeypatch/wrap patch.patchfile.__init__ to avoid rejects or conflicts due to expanded keywords in working dir.''' - orig(self, ui, fname, backend, mode, create, remove, missing, eolmode) + orig(self, ui, fname, backend, store, mode, create, remove, + eolmode, copysource) # shrink keywords read from working dir self.lines = kwt.shrinklines(self.fname, self.lines)
--- a/mercurial/patch.py Fri May 27 21:50:09 2011 +0200 +++ b/mercurial/patch.py Fri May 27 21:50:10 2011 +0200 @@ -7,7 +7,7 @@ # GNU General Public License version 2 or any later version. import cStringIO, email.Parser, os, errno, re -import tempfile, zlib +import tempfile, zlib, shutil from i18n import _ from node import hex, nullid, short @@ -362,10 +362,11 @@ """ raise NotImplementedError - def setfile(self, fname, data, mode): + def setfile(self, fname, data, mode, copysource): """Write data to target file fname and set its mode. mode is a (islink, isexec) tuple. If data is None, the file content should - be left unchanged. + be left unchanged. If the file is modified after being copied, + copysource is set to the original file name. """ raise NotImplementedError @@ -380,13 +381,6 @@ """ pass - def copy(self, src, dst): - """Copy src file into dst file. Create intermediate directories if - necessary. Files are specified relatively to the patching base - directory. - """ - raise NotImplementedError - def exists(self, fname): raise NotImplementedError @@ -411,7 +405,7 @@ raise return (self.opener.read(fname), (islink, isexec)) - def setfile(self, fname, data, mode): + def setfile(self, fname, data, mode, copysource): islink, isexec = mode if data is None: util.setflags(self._join(fname), islink, isexec) @@ -439,23 +433,6 @@ fp.writelines(lines) fp.close() - def copy(self, src, dst): - basedir = self.opener.base - abssrc, absdst = [scmutil.canonpath(basedir, basedir, x) - for x in [src, dst]] - if os.path.lexists(absdst): - raise util.Abort(_("cannot create %s: destination already exists") - % dst) - dstdir = os.path.dirname(absdst) - if dstdir and not os.path.isdir(dstdir): - try: - os.makedirs(dstdir) - except IOError: - raise util.Abort( - _("cannot create %s: unable to create destination directory") - % dst) - util.copyfile(abssrc, absdst) - def exists(self, fname): return os.path.lexists(self._join(fname)) @@ -468,8 +445,10 @@ self.changed = set() self.copied = [] - def setfile(self, fname, data, mode): - super(workingbackend, self).setfile(fname, data, mode) + def setfile(self, fname, data, mode, copysource): + super(workingbackend, self).setfile(fname, data, mode, copysource) + if copysource is not None: + self.copied.append((copysource, fname)) self.changed.add(fname) def unlink(self, fname): @@ -477,11 +456,6 @@ self.removed.add(fname) self.changed.add(fname) - def copy(self, src, dst): - super(workingbackend, self).copy(src, dst) - self.copied.append((src, dst)) - self.changed.add(dst) - def close(self): wctx = self.repo[None] addremoved = set(self.changed) @@ -498,14 +472,40 @@ scmutil.addremove(self.repo, addremoved, similarity=self.similarity) return sorted(self.changed) +class filestore(object): + def __init__(self): + self.opener = None + self.files = {} + self.created = 0 + + def setfile(self, fname, data, mode): + if self.opener is None: + root = tempfile.mkdtemp(prefix='hg-patch-') + self.opener = scmutil.opener(root) + # Avoid filename issues with these simple names + fn = str(self.created) + self.opener.write(fn, data) + self.created += 1 + self.files[fname] = (fn, mode) + + def getfile(self, fname): + if fname not in self.files: + raise IOError() + fn, mode = self.files[fname] + return self.opener.read(fn), mode + + def close(self): + if self.opener: + shutil.rmtree(self.opener.base) + # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1 unidesc = re.compile('@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@') contextdesc = re.compile('(---|\*\*\*) (\d+)(,(\d+))? (---|\*\*\*)') eolmodes = ['strict', 'crlf', 'lf', 'auto'] class patchfile(object): - def __init__(self, ui, fname, backend, mode, create, remove, missing=False, - eolmode='strict'): + def __init__(self, ui, fname, backend, store, mode, create, remove, + eolmode='strict', copysource=None): self.fname = fname self.eolmode = eolmode self.eol = None @@ -513,36 +513,43 @@ self.ui = ui self.lines = [] self.exists = False - self.missing = missing + self.missing = True self.mode = mode + self.copysource = copysource self.create = create self.remove = remove - if not missing: - try: - data, mode = self.backend.getfile(fname) - if data: - self.lines = data.splitlines(True) - if self.mode is None: - self.mode = mode - if self.lines: - # Normalize line endings - if self.lines[0].endswith('\r\n'): - self.eol = '\r\n' - elif self.lines[0].endswith('\n'): - self.eol = '\n' - if eolmode != 'strict': - nlines = [] - for l in self.lines: - if l.endswith('\r\n'): - l = l[:-2] + '\n' - nlines.append(l) - self.lines = nlines + try: + if copysource is None: + data, mode = backend.getfile(fname) self.exists = True - except IOError: - if self.mode is None: - self.mode = (False, False) - else: - self.ui.warn(_("unable to find '%s' for patching\n") % self.fname) + else: + data, mode = store.getfile(copysource) + self.exists = backend.exists(fname) + self.missing = False + if data: + self.lines = data.splitlines(True) + if self.mode is None: + self.mode = mode + if self.lines: + # Normalize line endings + if self.lines[0].endswith('\r\n'): + self.eol = '\r\n' + elif self.lines[0].endswith('\n'): + self.eol = '\n' + if eolmode != 'strict': + nlines = [] + for l in self.lines: + if l.endswith('\r\n'): + l = l[:-2] + '\n' + nlines.append(l) + self.lines = nlines + except IOError: + if create: + self.missing = False + if self.mode is None: + self.mode = (False, False) + if self.missing: + self.ui.warn(_("unable to find '%s' for patching\n") % self.fname) self.hash = {} self.dirty = 0 @@ -569,7 +576,7 @@ rawlines.append(l) lines = rawlines - self.backend.setfile(fname, ''.join(lines), mode) + self.backend.setfile(fname, ''.join(lines), mode, self.copysource) def printfile(self, warn): if self.fileprinted: @@ -623,7 +630,11 @@ return -1 if self.exists and self.create: - self.ui.warn(_("file %s already exists\n") % self.fname) + if self.copysource: + self.ui.warn(_("cannot create %s: destination already " + "exists\n" % self.fname)) + else: + self.ui.warn(_("file %s already exists\n") % self.fname) self.rej.append(h) return -1 @@ -1005,10 +1016,10 @@ # Git patches do not play games. Excluding copies from the # following heuristic avoids a lot of confusion fname = pathstrip(gp.path, strip - 1)[1] - create = gp.op == 'ADD' + create = gp.op in ('ADD', 'COPY', 'RENAME') remove = gp.op == 'DELETE' missing = not create and not backend.exists(fname) - return fname, missing, create, remove + return fname, create, remove nulla = afile_orig == "/dev/null" nullb = bfile_orig == "/dev/null" create = nulla and hunk.starta == 0 and hunk.lena == 0 @@ -1050,7 +1061,7 @@ else: raise PatchError(_("undefined source and destination files")) - return fname, missing, create, remove + return fname, create, remove def scangitpatch(lr, firstline): """ @@ -1177,7 +1188,7 @@ gp = gitpatches.pop()[1] yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp) -def applydiff(ui, fp, changed, backend, strip=1, eolmode='strict'): +def applydiff(ui, fp, changed, backend, store, strip=1, eolmode='strict'): """Reads a patch from fp and tries to apply it. The dict 'changed' is filled in with all of the filenames changed @@ -1188,10 +1199,11 @@ read in binary mode. Otherwise, line endings are ignored when patching then normalized according to 'eolmode'. """ - return _applydiff(ui, fp, patchfile, backend, changed, strip=strip, + return _applydiff(ui, fp, patchfile, backend, store, changed, strip=strip, eolmode=eolmode) -def _applydiff(ui, fp, patcher, backend, changed, strip=1, eolmode='strict'): +def _applydiff(ui, fp, patcher, backend, store, changed, strip=1, + eolmode='strict'): def pstrip(p): return pathstrip(p, strip - 1)[1] @@ -1214,30 +1226,42 @@ rejects += current_file.close() current_file = None afile, bfile, first_hunk, gp = values + copysource = None if gp: path = pstrip(gp.path) + if gp.oldpath: + copysource = pstrip(gp.oldpath) changed[path] = gp if gp.op == 'DELETE': backend.unlink(path) continue if gp.op == 'RENAME': - backend.unlink(pstrip(gp.oldpath)) - if gp.mode and not first_hunk: - data = None - if gp.op == 'ADD': - # Added files without content have no hunk and - # must be created - data = '' - backend.setfile(path, data, gp.mode) + backend.unlink(copysource) + if not first_hunk: + data, mode = None, None + if gp.op in ('RENAME', 'COPY'): + data, mode = store.getfile(copysource) + if gp.mode: + mode = gp.mode + if gp.op == 'ADD': + # Added files without content have no hunk and + # must be created + data = '' + if data or mode: + if (gp.op in ('ADD', 'RENAME', 'COPY') + and backend.exists(path)): + raise PatchError(_("cannot create %s: destination " + "already exists") % path) + backend.setfile(path, data, mode, copysource) if not first_hunk: continue try: mode = gp and gp.mode or None - current_file, missing, create, remove = selectfile( + current_file, create, remove = selectfile( backend, afile, bfile, first_hunk, strip, gp) - current_file = patcher(ui, current_file, backend, mode, - create, remove, missing=missing, - eolmode=eolmode) + current_file = patcher(ui, current_file, backend, store, mode, + create, remove, eolmode=eolmode, + copysource=copysource) except PatchError, inst: ui.warn(str(inst) + '\n') current_file = None @@ -1245,7 +1269,9 @@ continue elif state == 'git': for gp in values: - backend.copy(pstrip(gp.oldpath), pstrip(gp.path)) + path = pstrip(gp.oldpath) + data, mode = backend.getfile(path) + store.setfile(path, data, mode) else: raise util.Abort(_('unsupported parser state: %s') % state) @@ -1316,17 +1342,20 @@ raise util.Abort(_('unsupported line endings type: %s') % eolmode) eolmode = eolmode.lower() + store = filestore() backend = workingbackend(ui, repo, similarity) try: fp = open(patchobj, 'rb') except TypeError: fp = patchobj try: - ret = applydiff(ui, fp, files, backend, strip=strip, eolmode=eolmode) + ret = applydiff(ui, fp, files, backend, store, strip=strip, + eolmode=eolmode) finally: if fp != patchobj: fp.close() files.update(dict.fromkeys(backend.close())) + store.close() if ret < 0: raise PatchError(_('patch failed to apply')) return ret > 0 @@ -1370,7 +1399,7 @@ changed.add(pathstrip(gp.oldpath, strip - 1)[1]) if not first_hunk: continue - current_file, missing, create, remove = selectfile( + current_file, create, remove = selectfile( backend, afile, bfile, first_hunk, strip, gp) changed.add(current_file) elif state not in ('hunk', 'git'):
--- a/tests/test-git-import.t Fri May 27 21:50:09 2011 +0200 +++ b/tests/test-git-import.t Fri May 27 21:50:10 2011 +0200 @@ -436,7 +436,9 @@ > +b > EOF applying patch from stdin - abort: cannot create b: destination already exists + cannot create b: destination already exists + 1 out of 1 hunks FAILED -- saving rejects to file b.rej + abort: patch failed to apply [255] $ cat b b
--- a/tests/test-import.t Fri May 27 21:50:09 2011 +0200 +++ b/tests/test-import.t Fri May 27 21:50:10 2011 +0200 @@ -617,7 +617,7 @@ > rename to bar > EOF applying patch from stdin - abort: ../outside/foo not under root + abort: path contains illegal component: ../outside/foo [255] $ cd ..
--- a/tests/test-mq-qrefresh.t Fri May 27 21:50:09 2011 +0200 +++ b/tests/test-mq-qrefresh.t Fri May 27 21:50:10 2011 +0200 @@ -502,6 +502,7 @@ refresh interrupted while patch was popped! (revert --all, qpush to recover) abort: username 'foo\nbar' contains a newline! [255] + $ rm a $ cat .hg/patches/a # HG changeset patch # Parent 0000000000000000000000000000000000000000
--- a/tests/test-mq-symlinks.t Fri May 27 21:50:09 2011 +0200 +++ b/tests/test-mq-symlinks.t Fri May 27 21:50:10 2011 +0200 @@ -115,6 +115,8 @@ $ ln -s linkbb linkb $ hg qpush applying movelink + cannot create linkb: destination already exists + 1 out of 1 hunks FAILED -- saving rejects to file linkb.rej patch failed, unable to continue (try -v) patch failed, rejects left in working dir errors during apply, please fix and refresh movelink