changeset 10189:e451e599fbcf

patch: support diff data loss detection and upgrade In worst case, generating diff in upgrade mode can be two times more expensive than generating it in git mode directly: we may have to regenerate the whole diff again whenever a git feature is detected. Also, the first diff attempt is completely buffered instead of being streamed. That said, even without having profiled it yet, I am convinced we can fast-path the upgrade mode if necessary were it to be used in regular diff commands, and not only in mq where avoiding data loss is worth the price.
author Patrick Mezard <pmezard@gmail.com>
date Fri, 01 Jan 2010 20:54:05 +0100
parents fd6e9c7cd98c
children 9c2c94934f0d
files mercurial/mdiff.py mercurial/patch.py tests/autodiff.py tests/test-diff-upgrade tests/test-diff-upgrade.out
diffstat 5 files changed, 398 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/mdiff.py	Fri Jan 01 20:52:05 2010 +0100
+++ b/mercurial/mdiff.py	Fri Jan 01 20:54:05 2010 +0100
@@ -27,7 +27,9 @@
     nodates removes dates from diff headers
     ignorews ignores all whitespace changes in the diff
     ignorewsamount ignores changes in the amount of whitespace
-    ignoreblanklines ignores changes whose lines are all blank'''
+    ignoreblanklines ignores changes whose lines are all blank
+    upgrade generates git diffs to avoid data loss
+    '''
 
     defaults = {
         'context': 3,
@@ -38,6 +40,7 @@
         'ignorews': False,
         'ignorewsamount': False,
         'ignoreblanklines': False,
+        'upgrade': False,
         }
 
     __slots__ = defaults.keys()
--- a/mercurial/patch.py	Fri Jan 01 20:52:05 2010 +0100
+++ b/mercurial/patch.py	Fri Jan 01 20:54:05 2010 +0100
@@ -1246,17 +1246,25 @@
     ret.append('\n')
     return ''.join(ret)
 
-def _addmodehdr(header, omode, nmode):
-    if omode != nmode:
-        header.append('old mode %s\n' % omode)
-        header.append('new mode %s\n' % nmode)
+class GitDiffRequired(Exception):
+    pass
 
-def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None):
+def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None,
+         losedatafn=None):
     '''yields diff of changes to files between two nodes, or node and
     working directory.
 
     if node1 is None, use first dirstate parent instead.
-    if node2 is None, compare node1 with working directory.'''
+    if node2 is None, compare node1 with working directory.
+
+    losedatafn(**kwarg) is a callable run when opts.upgrade=True and
+    every time some change cannot be represented with the current
+    patch format. Return False to upgrade to git patch format, True to
+    accept the loss or raise an exception to abort the diff. It is
+    called with the name of current file being diffed as 'fn'. If set
+    to None, patches will always be upgraded to git format when
+    necessary.
+    '''
 
     if opts is None:
         opts = mdiff.defaultopts
@@ -1288,24 +1296,50 @@
     modified, added, removed = changes[:3]
 
     if not modified and not added and not removed:
-        return
+        return []
+
+    revs = None
+    if not repo.ui.quiet:
+        hexfunc = repo.ui.debugflag and hex or short
+        revs = [hexfunc(node) for node in [node1, node2] if node]
+
+    copy = {}
+    if opts.git or opts.upgrade:
+        copy = copies.copies(repo, ctx1, ctx2, repo[nullid])[0]
+        copy = copy.copy()
+        for k, v in copy.items():
+            copy[v] = k
+
+    difffn = lambda opts, losedata: trydiff(repo, revs, ctx1, ctx2,
+                 modified, added, removed, copy, getfilectx, opts, losedata)
+    if opts.upgrade and not opts.git:
+        try:
+            def losedata(fn):
+                if not losedatafn or not losedatafn(fn=fn):
+                    raise GitDiffRequired()
+            # Buffer the whole output until we are sure it can be generated
+            return list(difffn(opts.copy(git=False), losedata))
+        except GitDiffRequired:
+            return difffn(opts.copy(git=True), None)
+    else:
+        return difffn(opts, None)
+
+def _addmodehdr(header, omode, nmode):
+    if omode != nmode:
+        header.append('old mode %s\n' % omode)
+        header.append('new mode %s\n' % nmode)
+
+def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
+            copy, getfilectx, opts, losedatafn):
 
     date1 = util.datestr(ctx1.date())
     man1 = ctx1.manifest()
 
-    revs = None
-    if not repo.ui.quiet and not opts.git:
-        hexfunc = repo.ui.debugflag and hex or short
-        revs = [hexfunc(node) for node in [node1, node2] if node]
+    gone = set()
+    gitmode = {'l': '120000', 'x': '100755', '': '100644'}
 
     if opts.git:
-        copy, diverge = copies.copies(repo, ctx1, ctx2, repo[nullid])
-        copy = copy.copy()
-        for k, v in copy.items():
-            copy[v] = k
-
-    gone = set()
-    gitmode = {'l': '120000', 'x': '100755', '': '100644'}
+        revs = None
 
     for f in sorted(modified + added + removed):
         to = None
@@ -1317,39 +1351,61 @@
         if f not in removed:
             tn = getfilectx(f, ctx2).data()
         a, b = f, f
-        if opts.git:
+        if opts.git or losedatafn:
             if f in added:
                 mode = gitmode[ctx2.flags(f)]
                 if f in copy:
-                    a = copy[f]
-                    omode = gitmode[man1.flags(a)]
-                    _addmodehdr(header, omode, mode)
-                    if a in removed and a not in gone:
-                        op = 'rename'
-                        gone.add(a)
+                    if opts.git:
+                        a = copy[f]
+                        omode = gitmode[man1.flags(a)]
+                        _addmodehdr(header, omode, mode)
+                        if a in removed and a not in gone:
+                            op = 'rename'
+                            gone.add(a)
+                        else:
+                            op = 'copy'
+                        header.append('%s from %s\n' % (op, a))
+                        header.append('%s to %s\n' % (op, f))
+                        to = getfilectx(a, ctx1).data()
                     else:
-                        op = 'copy'
-                    header.append('%s from %s\n' % (op, a))
-                    header.append('%s to %s\n' % (op, f))
-                    to = getfilectx(a, ctx1).data()
+                        losedatafn(f)
                 else:
-                    header.append('new file mode %s\n' % mode)
+                    if opts.git:
+                        header.append('new file mode %s\n' % mode)
+                    elif ctx2.flags(f):
+                        losedatafn(f)
                 if util.binary(tn):
-                    dodiff = 'binary'
+                    if opts.git:
+                        dodiff = 'binary'
+                    else:
+                        losedatafn(f)
+                if not opts.git and not tn:
+                    # regular diffs cannot represent new empty file
+                    losedatafn(f)
             elif f in removed:
-                # have we already reported a copy above?
-                if f in copy and copy[f] in added and copy[copy[f]] == f:
-                    dodiff = False
-                else:
-                    header.append('deleted file mode %s\n' %
-                                  gitmode[man1.flags(f)])
+                if opts.git:
+                    # have we already reported a copy above?
+                    if f in copy and copy[f] in added and copy[copy[f]] == f:
+                        dodiff = False
+                    else:
+                        header.append('deleted file mode %s\n' %
+                                      gitmode[man1.flags(f)])
+                elif not to:
+                    # regular diffs cannot represent empty file deletion
+                    losedatafn(f)
             else:
-                omode = gitmode[man1.flags(f)]
-                nmode = gitmode[ctx2.flags(f)]
-                _addmodehdr(header, omode, nmode)
-                if util.binary(to) or util.binary(tn):
-                    dodiff = 'binary'
-            header.insert(0, mdiff.diffline(revs, a, b, opts))
+                oflag = man1.flags(f)
+                nflag = ctx2.flags(f)
+                binary = util.binary(to) or util.binary(tn)
+                if opts.git:
+                    _addmodehdr(header, gitmode[oflag], gitmode[nflag])
+                    if binary:
+                        dodiff = 'binary'
+                elif binary or nflag != oflag:
+                    losedatafn(f)
+            if opts.git:
+                header.insert(0, mdiff.diffline(revs, a, b, opts))
+
         if dodiff:
             if dodiff == 'binary':
                 text = b85diff(to, tn)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/autodiff.py	Fri Jan 01 20:54:05 2010 +0100
@@ -0,0 +1,46 @@
+# Extension dedicated to test patch.diff() upgrade modes
+#
+#
+from mercurial import cmdutil, patch, util
+
+def autodiff(ui, repo, *pats, **opts):
+    diffopts = patch.diffopts(ui, opts)
+    git = opts.get('git', 'no')
+    brokenfiles = set()
+    losedatafn = None
+    if git in ('yes', 'no'):
+        diffopts.git = git == 'yes'
+        diffopts.upgrade = False
+    elif git == 'auto':
+        diffopts.git = False
+        diffopts.upgrade = True
+    elif git == 'warn':
+        diffopts.git = False
+        diffopts.upgrade = True
+        def losedatafn(fn=None, **kwargs):
+            brokenfiles.add(fn)
+            return True
+    elif git == 'abort':
+        diffopts.git = False
+        diffopts.upgrade = True
+        def losedatafn(fn=None, **kwargs):
+            raise util.Abort('losing data for %s' % fn)
+    else:
+        raise util.Abort('--git must be yes, no or auto')
+
+    node1, node2 = cmdutil.revpair(repo, [])
+    m = cmdutil.match(repo, pats, opts)
+    it = patch.diff(repo, node1, node2, match=m, opts=diffopts,
+                    losedatafn=losedatafn)
+    for chunk in it:
+        ui.write(chunk)
+    for fn in sorted(brokenfiles):
+        ui.write('data lost for: %s\n' % fn)
+
+cmdtable = {
+    "autodiff":
+        (autodiff,
+         [('', 'git', '', 'git upgrade mode (yes/no/auto/warn/abort)'),
+          ],
+         '[OPTION]... [FILE]...'),
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-diff-upgrade	Fri Jan 01 20:54:05 2010 +0100
@@ -0,0 +1,63 @@
+#!/bin/sh
+
+echo "[extensions]" >> $HGRCPATH
+echo "autodiff=$TESTDIR/autodiff.py" >> $HGRCPATH
+echo "[diff]" >> $HGRCPATH
+echo "nodates=1" >> $HGRCPATH
+
+hg init repo
+cd repo
+echo '% make a combination of new, changed and deleted file'
+echo regular > regular
+echo rmregular > rmregular
+touch rmempty
+echo exec > exec
+chmod +x exec
+echo rmexec > rmexec
+chmod +x rmexec
+echo setexec > setexec
+echo unsetexec > unsetexec
+chmod +x unsetexec
+echo binary > binary
+python -c "file('rmbinary', 'wb').write('\0')"
+hg ci -Am addfiles
+echo regular >> regular
+echo newregular >> newregular
+rm rmempty
+touch newempty
+rm rmregular
+echo exec >> exec
+echo newexec > newexec
+chmod +x newexec
+rm rmexec
+chmod +x setexec
+chmod -x unsetexec
+python -c "file('binary', 'wb').write('\0\0')"
+python -c "file('newbinary', 'wb').write('\0')"
+rm rmbinary
+hg addremove
+
+echo '% git=no: regular diff for all files'
+hg autodiff --git=no
+
+echo '% git=no: git diff for single regular file'
+hg autodiff --git=yes regular
+
+echo '% git=auto: regular diff for regular files and removals'
+hg autodiff --git=auto regular newregular rmregular rmbinary rmexec
+
+for f in exec newexec setexec unsetexec binary newbinary newempty rmempty; do
+    echo '% git=auto: git diff for' $f
+    hg autodiff --git=auto $f
+done
+
+echo '% git=warn: regular diff with data loss warnings'
+hg autodiff --git=warn
+
+echo '% git=abort: fail on execute bit change'
+hg autodiff --git=abort regular setexec
+
+echo '% git=abort: succeed on regular file'
+hg autodiff --git=abort regular
+
+cd ..
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-diff-upgrade.out	Fri Jan 01 20:54:05 2010 +0100
@@ -0,0 +1,186 @@
+% make a combination of new, changed and deleted file
+adding binary
+adding exec
+adding regular
+adding rmbinary
+adding rmempty
+adding rmexec
+adding rmregular
+adding setexec
+adding unsetexec
+adding newbinary
+adding newempty
+adding newexec
+adding newregular
+removing rmbinary
+removing rmempty
+removing rmexec
+removing rmregular
+% git=no: regular diff for all files
+diff -r b3f053cd7c7f binary
+Binary file binary has changed
+diff -r b3f053cd7c7f exec
+--- a/exec
++++ b/exec
+@@ -1,1 +1,2 @@
+ exec
++exec
+diff -r b3f053cd7c7f newbinary
+Binary file newbinary has changed
+diff -r b3f053cd7c7f newexec
+--- /dev/null
++++ b/newexec
+@@ -0,0 +1,1 @@
++newexec
+diff -r b3f053cd7c7f newregular
+--- /dev/null
++++ b/newregular
+@@ -0,0 +1,1 @@
++newregular
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+diff -r b3f053cd7c7f rmbinary
+Binary file rmbinary has changed
+diff -r b3f053cd7c7f rmexec
+--- a/rmexec
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmexec
+diff -r b3f053cd7c7f rmregular
+--- a/rmregular
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmregular
+% git=no: git diff for single regular file
+diff --git a/regular b/regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+% git=auto: regular diff for regular files and removals
+diff -r b3f053cd7c7f newregular
+--- /dev/null
++++ b/newregular
+@@ -0,0 +1,1 @@
++newregular
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+diff -r b3f053cd7c7f rmbinary
+Binary file rmbinary has changed
+diff -r b3f053cd7c7f rmexec
+--- a/rmexec
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmexec
+diff -r b3f053cd7c7f rmregular
+--- a/rmregular
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmregular
+% git=auto: git diff for exec
+diff -r b3f053cd7c7f exec
+--- a/exec
++++ b/exec
+@@ -1,1 +1,2 @@
+ exec
++exec
+% git=auto: git diff for newexec
+diff --git a/newexec b/newexec
+new file mode 100755
+--- /dev/null
++++ b/newexec
+@@ -0,0 +1,1 @@
++newexec
+% git=auto: git diff for setexec
+diff --git a/setexec b/setexec
+old mode 100644
+new mode 100755
+% git=auto: git diff for unsetexec
+diff --git a/unsetexec b/unsetexec
+old mode 100755
+new mode 100644
+% git=auto: git diff for binary
+diff --git a/binary b/binary
+index a9128c283485202893f5af379dd9beccb6e79486..09f370e38f498a462e1ca0faa724559b6630c04f
+GIT binary patch
+literal 2
+Jc${Nk0000200961
+
+% git=auto: git diff for newbinary
+diff --git a/newbinary b/newbinary
+new file mode 100644
+index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d
+GIT binary patch
+literal 1
+Ic${MZ000310RR91
+
+% git=auto: git diff for newempty
+diff --git a/newempty b/newempty
+new file mode 100644
+% git=auto: git diff for rmempty
+diff --git a/rmempty b/rmempty
+deleted file mode 100644
+% git=warn: regular diff with data loss warnings
+diff -r b3f053cd7c7f binary
+Binary file binary has changed
+diff -r b3f053cd7c7f exec
+--- a/exec
++++ b/exec
+@@ -1,1 +1,2 @@
+ exec
++exec
+diff -r b3f053cd7c7f newbinary
+Binary file newbinary has changed
+diff -r b3f053cd7c7f newexec
+--- /dev/null
++++ b/newexec
+@@ -0,0 +1,1 @@
++newexec
+diff -r b3f053cd7c7f newregular
+--- /dev/null
++++ b/newregular
+@@ -0,0 +1,1 @@
++newregular
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+diff -r b3f053cd7c7f rmbinary
+Binary file rmbinary has changed
+diff -r b3f053cd7c7f rmexec
+--- a/rmexec
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmexec
+diff -r b3f053cd7c7f rmregular
+--- a/rmregular
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmregular
+data lost for: binary
+data lost for: newbinary
+data lost for: newempty
+data lost for: newexec
+data lost for: rmempty
+data lost for: setexec
+data lost for: unsetexec
+% git=abort: fail on execute bit change
+abort: losing data for setexec
+% git=abort: succeed on regular file
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular