patch: use temporary files to handle intermediate copies
authorPatrick Mezard <pmezard@gmail.com>
Fri, 27 May 2011 21:50:10 +0200
changeset 14452 ee574cfd0c32
parent 14451 c78d41db6f88
child 14453 ea3d548132cc
patch: use temporary files to handle intermediate copies git patches may require copies to be handled out-of-order. For instance, take the following sequence: * modify a * copy a into b Here, we have to generate b from a before its modification. To do so, applydiff() was scanning for copy metadata and performing the copies before processing the other changes in-order. While smart and efficient, this approach complicates things by handling file copies and file creations at different places and times. While a new file must not exist before being patched a copied file already exists before applying the first hunk. Instead of copying the files at their final destination before patching, we store them in a temporary file location and retrieve them when patching. The filestore always stores file content in real files but nothing prevents adding a cache layer. The filestore class was kept separate from fsbackend for at least two reasons: - This class is likely to be reused as a temporary result store for a future repository patching call (entries just have to be extended to contain copy sources). - Delegating this role to backends might be more efficient in a repository backend case: the source files are already available in the repository itself and do not need to be copied again. It also means that third-parties backend would have to implement two other methods. If we ever decide to merge the filestore feature into backend, a minimalistic approach would be to compose with filestore directly. Keep in mind this copy overhead only applies for copy/rename sources, and may even be reduced to copy sources which have to handled ahead of time.
hgext/keyword.py
mercurial/patch.py
tests/test-git-import.t
tests/test-import.t
tests/test-mq-qrefresh.t
tests/test-mq-symlinks.t
--- a/hgext/keyword.py	Fri May 27 21:50:09 2011 +0200
+++ b/hgext/keyword.py	Fri May 27 21:50:10 2011 +0200
@@ -595,11 +595,12 @@
                 wlock.release()
 
     # monkeypatches
-    def kwpatchfile_init(orig, self, ui, fname, backend, mode, create, remove,
-                         missing=False, eolmode=None):
+    def kwpatchfile_init(orig, self, ui, fname, backend, store, mode, create,
+                         remove, eolmode=None, copysource=None):
         '''Monkeypatch/wrap patch.patchfile.__init__ to avoid
         rejects or conflicts due to expanded keywords in working dir.'''
-        orig(self, ui, fname, backend, mode, create, remove, missing, eolmode)
+        orig(self, ui, fname, backend, store, mode, create, remove,
+             eolmode, copysource)
         # shrink keywords read from working dir
         self.lines = kwt.shrinklines(self.fname, self.lines)
 
--- a/mercurial/patch.py	Fri May 27 21:50:09 2011 +0200
+++ b/mercurial/patch.py	Fri May 27 21:50:10 2011 +0200
@@ -7,7 +7,7 @@
 # GNU General Public License version 2 or any later version.
 
 import cStringIO, email.Parser, os, errno, re
-import tempfile, zlib
+import tempfile, zlib, shutil
 
 from i18n import _
 from node import hex, nullid, short
@@ -362,10 +362,11 @@
         """
         raise NotImplementedError
 
-    def setfile(self, fname, data, mode):
+    def setfile(self, fname, data, mode, copysource):
         """Write data to target file fname and set its mode. mode is a
         (islink, isexec) tuple. If data is None, the file content should
-        be left unchanged.
+        be left unchanged. If the file is modified after being copied,
+        copysource is set to the original file name.
         """
         raise NotImplementedError
 
@@ -380,13 +381,6 @@
         """
         pass
 
-    def copy(self, src, dst):
-        """Copy src file into dst file. Create intermediate directories if
-        necessary. Files are specified relatively to the patching base
-        directory.
-        """
-        raise NotImplementedError
-
     def exists(self, fname):
         raise NotImplementedError
 
@@ -411,7 +405,7 @@
                 raise
         return (self.opener.read(fname), (islink, isexec))
 
-    def setfile(self, fname, data, mode):
+    def setfile(self, fname, data, mode, copysource):
         islink, isexec = mode
         if data is None:
             util.setflags(self._join(fname), islink, isexec)
@@ -439,23 +433,6 @@
         fp.writelines(lines)
         fp.close()
 
-    def copy(self, src, dst):
-        basedir = self.opener.base
-        abssrc, absdst = [scmutil.canonpath(basedir, basedir, x)
-                          for x in [src, dst]]
-        if os.path.lexists(absdst):
-            raise util.Abort(_("cannot create %s: destination already exists")
-                             % dst)
-        dstdir = os.path.dirname(absdst)
-        if dstdir and not os.path.isdir(dstdir):
-            try:
-                os.makedirs(dstdir)
-            except IOError:
-                raise util.Abort(
-                    _("cannot create %s: unable to create destination directory")
-                    % dst)
-        util.copyfile(abssrc, absdst)
-
     def exists(self, fname):
         return os.path.lexists(self._join(fname))
 
@@ -468,8 +445,10 @@
         self.changed = set()
         self.copied = []
 
-    def setfile(self, fname, data, mode):
-        super(workingbackend, self).setfile(fname, data, mode)
+    def setfile(self, fname, data, mode, copysource):
+        super(workingbackend, self).setfile(fname, data, mode, copysource)
+        if copysource is not None:
+            self.copied.append((copysource, fname))
         self.changed.add(fname)
 
     def unlink(self, fname):
@@ -477,11 +456,6 @@
         self.removed.add(fname)
         self.changed.add(fname)
 
-    def copy(self, src, dst):
-        super(workingbackend, self).copy(src, dst)
-        self.copied.append((src, dst))
-        self.changed.add(dst)
-
     def close(self):
         wctx = self.repo[None]
         addremoved = set(self.changed)
@@ -498,14 +472,40 @@
             scmutil.addremove(self.repo, addremoved, similarity=self.similarity)
         return sorted(self.changed)
 
+class filestore(object):
+    def __init__(self):
+        self.opener = None
+        self.files = {}
+        self.created = 0
+
+    def setfile(self, fname, data, mode):
+        if self.opener is None:
+            root = tempfile.mkdtemp(prefix='hg-patch-')
+            self.opener = scmutil.opener(root)
+        # Avoid filename issues with these simple names
+        fn = str(self.created)
+        self.opener.write(fn, data)
+        self.created += 1
+        self.files[fname] = (fn, mode)
+
+    def getfile(self, fname):
+        if fname not in self.files:
+            raise IOError()
+        fn, mode = self.files[fname]
+        return self.opener.read(fn), mode
+
+    def close(self):
+        if self.opener:
+            shutil.rmtree(self.opener.base)
+
 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
 unidesc = re.compile('@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@')
 contextdesc = re.compile('(---|\*\*\*) (\d+)(,(\d+))? (---|\*\*\*)')
 eolmodes = ['strict', 'crlf', 'lf', 'auto']
 
 class patchfile(object):
-    def __init__(self, ui, fname, backend, mode, create, remove, missing=False,
-                 eolmode='strict'):
+    def __init__(self, ui, fname, backend, store, mode, create, remove,
+                 eolmode='strict', copysource=None):
         self.fname = fname
         self.eolmode = eolmode
         self.eol = None
@@ -513,36 +513,43 @@
         self.ui = ui
         self.lines = []
         self.exists = False
-        self.missing = missing
+        self.missing = True
         self.mode = mode
+        self.copysource = copysource
         self.create = create
         self.remove = remove
-        if not missing:
-            try:
-                data, mode = self.backend.getfile(fname)
-                if data:
-                    self.lines = data.splitlines(True)
-                if self.mode is None:
-                    self.mode = mode
-                if self.lines:
-                    # Normalize line endings
-                    if self.lines[0].endswith('\r\n'):
-                        self.eol = '\r\n'
-                    elif self.lines[0].endswith('\n'):
-                        self.eol = '\n'
-                    if eolmode != 'strict':
-                        nlines = []
-                        for l in self.lines:
-                            if l.endswith('\r\n'):
-                                l = l[:-2] + '\n'
-                            nlines.append(l)
-                        self.lines = nlines
+        try:
+            if copysource is None:
+                data, mode = backend.getfile(fname)
                 self.exists = True
-            except IOError:
-                if self.mode is None:
-                    self.mode = (False, False)
-        else:
-            self.ui.warn(_("unable to find '%s' for patching\n") % self.fname)
+            else:
+                data, mode = store.getfile(copysource)
+                self.exists = backend.exists(fname)
+            self.missing = False
+            if data:
+                self.lines = data.splitlines(True)
+            if self.mode is None:
+                self.mode = mode
+            if self.lines:
+                # Normalize line endings
+                if self.lines[0].endswith('\r\n'):
+                    self.eol = '\r\n'
+                elif self.lines[0].endswith('\n'):
+                    self.eol = '\n'
+                if eolmode != 'strict':
+                    nlines = []
+                    for l in self.lines:
+                        if l.endswith('\r\n'):
+                            l = l[:-2] + '\n'
+                        nlines.append(l)
+                    self.lines = nlines
+        except IOError:
+            if create:
+                self.missing = False
+            if self.mode is None:
+                self.mode = (False, False)
+        if self.missing:
+             self.ui.warn(_("unable to find '%s' for patching\n") % self.fname)
 
         self.hash = {}
         self.dirty = 0
@@ -569,7 +576,7 @@
                 rawlines.append(l)
             lines = rawlines
 
-        self.backend.setfile(fname, ''.join(lines), mode)
+        self.backend.setfile(fname, ''.join(lines), mode, self.copysource)
 
     def printfile(self, warn):
         if self.fileprinted:
@@ -623,7 +630,11 @@
             return -1
 
         if self.exists and self.create:
-            self.ui.warn(_("file %s already exists\n") % self.fname)
+            if self.copysource:
+                self.ui.warn(_("cannot create %s: destination already "
+                               "exists\n" % self.fname))
+            else:
+                self.ui.warn(_("file %s already exists\n") % self.fname)
             self.rej.append(h)
             return -1
 
@@ -1005,10 +1016,10 @@
         # Git patches do not play games. Excluding copies from the
         # following heuristic avoids a lot of confusion
         fname = pathstrip(gp.path, strip - 1)[1]
-        create = gp.op == 'ADD'
+        create = gp.op in ('ADD', 'COPY', 'RENAME')
         remove = gp.op == 'DELETE'
         missing = not create and not backend.exists(fname)
-        return fname, missing, create, remove
+        return fname, create, remove
     nulla = afile_orig == "/dev/null"
     nullb = bfile_orig == "/dev/null"
     create = nulla and hunk.starta == 0 and hunk.lena == 0
@@ -1050,7 +1061,7 @@
         else:
             raise PatchError(_("undefined source and destination files"))
 
-    return fname, missing, create, remove
+    return fname, create, remove
 
 def scangitpatch(lr, firstline):
     """
@@ -1177,7 +1188,7 @@
         gp = gitpatches.pop()[1]
         yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp)
 
-def applydiff(ui, fp, changed, backend, strip=1, eolmode='strict'):
+def applydiff(ui, fp, changed, backend, store, strip=1, eolmode='strict'):
     """Reads a patch from fp and tries to apply it.
 
     The dict 'changed' is filled in with all of the filenames changed
@@ -1188,10 +1199,11 @@
     read in binary mode. Otherwise, line endings are ignored when
     patching then normalized according to 'eolmode'.
     """
-    return _applydiff(ui, fp, patchfile, backend, changed, strip=strip,
+    return _applydiff(ui, fp, patchfile, backend, store, changed, strip=strip,
                       eolmode=eolmode)
 
-def _applydiff(ui, fp, patcher, backend, changed, strip=1, eolmode='strict'):
+def _applydiff(ui, fp, patcher, backend, store, changed, strip=1,
+               eolmode='strict'):
 
     def pstrip(p):
         return pathstrip(p, strip - 1)[1]
@@ -1214,30 +1226,42 @@
                 rejects += current_file.close()
                 current_file = None
             afile, bfile, first_hunk, gp = values
+            copysource = None
             if gp:
                 path = pstrip(gp.path)
+                if gp.oldpath:
+                    copysource = pstrip(gp.oldpath)
                 changed[path] = gp
                 if gp.op == 'DELETE':
                     backend.unlink(path)
                     continue
                 if gp.op == 'RENAME':
-                    backend.unlink(pstrip(gp.oldpath))
-                if gp.mode and not first_hunk:
-                    data = None
-                    if gp.op == 'ADD':
-                        # Added files without content have no hunk and
-                        # must be created
-                        data = ''
-                    backend.setfile(path, data, gp.mode)
+                    backend.unlink(copysource)
+                if not first_hunk:
+                    data, mode = None, None
+                    if gp.op in ('RENAME', 'COPY'):
+                        data, mode = store.getfile(copysource)
+                    if gp.mode:
+                        mode = gp.mode
+                        if gp.op == 'ADD':
+                            # Added files without content have no hunk and
+                            # must be created
+                            data = ''
+                    if data or mode:
+                        if (gp.op in ('ADD', 'RENAME', 'COPY')
+                            and backend.exists(path)):
+                            raise PatchError(_("cannot create %s: destination "
+                                               "already exists") % path)
+                        backend.setfile(path, data, mode, copysource)
             if not first_hunk:
                 continue
             try:
                 mode = gp and gp.mode or None
-                current_file, missing, create, remove = selectfile(
+                current_file, create, remove = selectfile(
                     backend, afile, bfile, first_hunk, strip, gp)
-                current_file = patcher(ui, current_file, backend, mode,
-                                       create, remove, missing=missing,
-                                       eolmode=eolmode)
+                current_file = patcher(ui, current_file, backend, store, mode,
+                                       create, remove, eolmode=eolmode,
+                                       copysource=copysource)
             except PatchError, inst:
                 ui.warn(str(inst) + '\n')
                 current_file = None
@@ -1245,7 +1269,9 @@
                 continue
         elif state == 'git':
             for gp in values:
-                backend.copy(pstrip(gp.oldpath), pstrip(gp.path))
+                path = pstrip(gp.oldpath)
+                data, mode = backend.getfile(path)
+                store.setfile(path, data, mode)
         else:
             raise util.Abort(_('unsupported parser state: %s') % state)
 
@@ -1316,17 +1342,20 @@
         raise util.Abort(_('unsupported line endings type: %s') % eolmode)
     eolmode = eolmode.lower()
 
+    store = filestore()
     backend = workingbackend(ui, repo, similarity)
     try:
         fp = open(patchobj, 'rb')
     except TypeError:
         fp = patchobj
     try:
-        ret = applydiff(ui, fp, files, backend, strip=strip, eolmode=eolmode)
+        ret = applydiff(ui, fp, files, backend, store, strip=strip,
+                        eolmode=eolmode)
     finally:
         if fp != patchobj:
             fp.close()
         files.update(dict.fromkeys(backend.close()))
+        store.close()
     if ret < 0:
         raise PatchError(_('patch failed to apply'))
     return ret > 0
@@ -1370,7 +1399,7 @@
                         changed.add(pathstrip(gp.oldpath, strip - 1)[1])
                 if not first_hunk:
                     continue
-                current_file, missing, create, remove = selectfile(
+                current_file, create, remove = selectfile(
                     backend, afile, bfile, first_hunk, strip, gp)
                 changed.add(current_file)
             elif state not in ('hunk', 'git'):
--- a/tests/test-git-import.t	Fri May 27 21:50:09 2011 +0200
+++ b/tests/test-git-import.t	Fri May 27 21:50:10 2011 +0200
@@ -436,7 +436,9 @@
   > +b
   > EOF
   applying patch from stdin
-  abort: cannot create b: destination already exists
+  cannot create b: destination already exists
+  1 out of 1 hunks FAILED -- saving rejects to file b.rej
+  abort: patch failed to apply
   [255]
   $ cat b
   b
--- a/tests/test-import.t	Fri May 27 21:50:09 2011 +0200
+++ b/tests/test-import.t	Fri May 27 21:50:10 2011 +0200
@@ -617,7 +617,7 @@
   > rename to bar
   > EOF
   applying patch from stdin
-  abort: ../outside/foo not under root
+  abort: path contains illegal component: ../outside/foo
   [255]
   $ cd ..
 
--- a/tests/test-mq-qrefresh.t	Fri May 27 21:50:09 2011 +0200
+++ b/tests/test-mq-qrefresh.t	Fri May 27 21:50:10 2011 +0200
@@ -502,6 +502,7 @@
   refresh interrupted while patch was popped! (revert --all, qpush to recover)
   abort: username 'foo\nbar' contains a newline!
   [255]
+  $ rm a
   $ cat .hg/patches/a
   # HG changeset patch
   # Parent 0000000000000000000000000000000000000000
--- a/tests/test-mq-symlinks.t	Fri May 27 21:50:09 2011 +0200
+++ b/tests/test-mq-symlinks.t	Fri May 27 21:50:10 2011 +0200
@@ -115,6 +115,8 @@
   $ ln -s linkbb linkb
   $ hg qpush
   applying movelink
+  cannot create linkb: destination already exists
+  1 out of 1 hunks FAILED -- saving rejects to file linkb.rej
   patch failed, unable to continue (try -v)
   patch failed, rejects left in working dir
   errors during apply, please fix and refresh movelink