patch: support diff data loss detection and upgrade
In worst case, generating diff in upgrade mode can be two times more expensive
than generating it in git mode directly: we may have to regenerate the whole
diff again whenever a git feature is detected. Also, the first diff attempt is
completely buffered instead of being streamed. That said, even without having
profiled it yet, I am convinced we can fast-path the upgrade mode if necessary
were it to be used in regular diff commands, and not only in mq where avoiding
data loss is worth the price.
--- a/mercurial/mdiff.py Fri Jan 01 20:52:05 2010 +0100
+++ b/mercurial/mdiff.py Fri Jan 01 20:54:05 2010 +0100
@@ -27,7 +27,9 @@
nodates removes dates from diff headers
ignorews ignores all whitespace changes in the diff
ignorewsamount ignores changes in the amount of whitespace
- ignoreblanklines ignores changes whose lines are all blank'''
+ ignoreblanklines ignores changes whose lines are all blank
+ upgrade generates git diffs to avoid data loss
+ '''
defaults = {
'context': 3,
@@ -38,6 +40,7 @@
'ignorews': False,
'ignorewsamount': False,
'ignoreblanklines': False,
+ 'upgrade': False,
}
__slots__ = defaults.keys()
--- a/mercurial/patch.py Fri Jan 01 20:52:05 2010 +0100
+++ b/mercurial/patch.py Fri Jan 01 20:54:05 2010 +0100
@@ -1246,17 +1246,25 @@
ret.append('\n')
return ''.join(ret)
-def _addmodehdr(header, omode, nmode):
- if omode != nmode:
- header.append('old mode %s\n' % omode)
- header.append('new mode %s\n' % nmode)
+class GitDiffRequired(Exception):
+ pass
-def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None):
+def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None,
+ losedatafn=None):
'''yields diff of changes to files between two nodes, or node and
working directory.
if node1 is None, use first dirstate parent instead.
- if node2 is None, compare node1 with working directory.'''
+ if node2 is None, compare node1 with working directory.
+
+ losedatafn(**kwarg) is a callable run when opts.upgrade=True and
+ every time some change cannot be represented with the current
+ patch format. Return False to upgrade to git patch format, True to
+ accept the loss or raise an exception to abort the diff. It is
+ called with the name of current file being diffed as 'fn'. If set
+ to None, patches will always be upgraded to git format when
+ necessary.
+ '''
if opts is None:
opts = mdiff.defaultopts
@@ -1288,24 +1296,50 @@
modified, added, removed = changes[:3]
if not modified and not added and not removed:
- return
+ return []
+
+ revs = None
+ if not repo.ui.quiet:
+ hexfunc = repo.ui.debugflag and hex or short
+ revs = [hexfunc(node) for node in [node1, node2] if node]
+
+ copy = {}
+ if opts.git or opts.upgrade:
+ copy = copies.copies(repo, ctx1, ctx2, repo[nullid])[0]
+ copy = copy.copy()
+ for k, v in copy.items():
+ copy[v] = k
+
+ difffn = lambda opts, losedata: trydiff(repo, revs, ctx1, ctx2,
+ modified, added, removed, copy, getfilectx, opts, losedata)
+ if opts.upgrade and not opts.git:
+ try:
+ def losedata(fn):
+ if not losedatafn or not losedatafn(fn=fn):
+ raise GitDiffRequired()
+ # Buffer the whole output until we are sure it can be generated
+ return list(difffn(opts.copy(git=False), losedata))
+ except GitDiffRequired:
+ return difffn(opts.copy(git=True), None)
+ else:
+ return difffn(opts, None)
+
+def _addmodehdr(header, omode, nmode):
+ if omode != nmode:
+ header.append('old mode %s\n' % omode)
+ header.append('new mode %s\n' % nmode)
+
+def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
+ copy, getfilectx, opts, losedatafn):
date1 = util.datestr(ctx1.date())
man1 = ctx1.manifest()
- revs = None
- if not repo.ui.quiet and not opts.git:
- hexfunc = repo.ui.debugflag and hex or short
- revs = [hexfunc(node) for node in [node1, node2] if node]
+ gone = set()
+ gitmode = {'l': '120000', 'x': '100755', '': '100644'}
if opts.git:
- copy, diverge = copies.copies(repo, ctx1, ctx2, repo[nullid])
- copy = copy.copy()
- for k, v in copy.items():
- copy[v] = k
-
- gone = set()
- gitmode = {'l': '120000', 'x': '100755', '': '100644'}
+ revs = None
for f in sorted(modified + added + removed):
to = None
@@ -1317,39 +1351,61 @@
if f not in removed:
tn = getfilectx(f, ctx2).data()
a, b = f, f
- if opts.git:
+ if opts.git or losedatafn:
if f in added:
mode = gitmode[ctx2.flags(f)]
if f in copy:
- a = copy[f]
- omode = gitmode[man1.flags(a)]
- _addmodehdr(header, omode, mode)
- if a in removed and a not in gone:
- op = 'rename'
- gone.add(a)
+ if opts.git:
+ a = copy[f]
+ omode = gitmode[man1.flags(a)]
+ _addmodehdr(header, omode, mode)
+ if a in removed and a not in gone:
+ op = 'rename'
+ gone.add(a)
+ else:
+ op = 'copy'
+ header.append('%s from %s\n' % (op, a))
+ header.append('%s to %s\n' % (op, f))
+ to = getfilectx(a, ctx1).data()
else:
- op = 'copy'
- header.append('%s from %s\n' % (op, a))
- header.append('%s to %s\n' % (op, f))
- to = getfilectx(a, ctx1).data()
+ losedatafn(f)
else:
- header.append('new file mode %s\n' % mode)
+ if opts.git:
+ header.append('new file mode %s\n' % mode)
+ elif ctx2.flags(f):
+ losedatafn(f)
if util.binary(tn):
- dodiff = 'binary'
+ if opts.git:
+ dodiff = 'binary'
+ else:
+ losedatafn(f)
+ if not opts.git and not tn:
+ # regular diffs cannot represent new empty file
+ losedatafn(f)
elif f in removed:
- # have we already reported a copy above?
- if f in copy and copy[f] in added and copy[copy[f]] == f:
- dodiff = False
- else:
- header.append('deleted file mode %s\n' %
- gitmode[man1.flags(f)])
+ if opts.git:
+ # have we already reported a copy above?
+ if f in copy and copy[f] in added and copy[copy[f]] == f:
+ dodiff = False
+ else:
+ header.append('deleted file mode %s\n' %
+ gitmode[man1.flags(f)])
+ elif not to:
+ # regular diffs cannot represent empty file deletion
+ losedatafn(f)
else:
- omode = gitmode[man1.flags(f)]
- nmode = gitmode[ctx2.flags(f)]
- _addmodehdr(header, omode, nmode)
- if util.binary(to) or util.binary(tn):
- dodiff = 'binary'
- header.insert(0, mdiff.diffline(revs, a, b, opts))
+ oflag = man1.flags(f)
+ nflag = ctx2.flags(f)
+ binary = util.binary(to) or util.binary(tn)
+ if opts.git:
+ _addmodehdr(header, gitmode[oflag], gitmode[nflag])
+ if binary:
+ dodiff = 'binary'
+ elif binary or nflag != oflag:
+ losedatafn(f)
+ if opts.git:
+ header.insert(0, mdiff.diffline(revs, a, b, opts))
+
if dodiff:
if dodiff == 'binary':
text = b85diff(to, tn)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/autodiff.py Fri Jan 01 20:54:05 2010 +0100
@@ -0,0 +1,46 @@
+# Extension dedicated to test patch.diff() upgrade modes
+#
+#
+from mercurial import cmdutil, patch, util
+
+def autodiff(ui, repo, *pats, **opts):
+ diffopts = patch.diffopts(ui, opts)
+ git = opts.get('git', 'no')
+ brokenfiles = set()
+ losedatafn = None
+ if git in ('yes', 'no'):
+ diffopts.git = git == 'yes'
+ diffopts.upgrade = False
+ elif git == 'auto':
+ diffopts.git = False
+ diffopts.upgrade = True
+ elif git == 'warn':
+ diffopts.git = False
+ diffopts.upgrade = True
+ def losedatafn(fn=None, **kwargs):
+ brokenfiles.add(fn)
+ return True
+ elif git == 'abort':
+ diffopts.git = False
+ diffopts.upgrade = True
+ def losedatafn(fn=None, **kwargs):
+ raise util.Abort('losing data for %s' % fn)
+ else:
+ raise util.Abort('--git must be yes, no or auto')
+
+ node1, node2 = cmdutil.revpair(repo, [])
+ m = cmdutil.match(repo, pats, opts)
+ it = patch.diff(repo, node1, node2, match=m, opts=diffopts,
+ losedatafn=losedatafn)
+ for chunk in it:
+ ui.write(chunk)
+ for fn in sorted(brokenfiles):
+ ui.write('data lost for: %s\n' % fn)
+
+cmdtable = {
+ "autodiff":
+ (autodiff,
+ [('', 'git', '', 'git upgrade mode (yes/no/auto/warn/abort)'),
+ ],
+ '[OPTION]... [FILE]...'),
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-diff-upgrade Fri Jan 01 20:54:05 2010 +0100
@@ -0,0 +1,63 @@
+#!/bin/sh
+
+echo "[extensions]" >> $HGRCPATH
+echo "autodiff=$TESTDIR/autodiff.py" >> $HGRCPATH
+echo "[diff]" >> $HGRCPATH
+echo "nodates=1" >> $HGRCPATH
+
+hg init repo
+cd repo
+echo '% make a combination of new, changed and deleted file'
+echo regular > regular
+echo rmregular > rmregular
+touch rmempty
+echo exec > exec
+chmod +x exec
+echo rmexec > rmexec
+chmod +x rmexec
+echo setexec > setexec
+echo unsetexec > unsetexec
+chmod +x unsetexec
+echo binary > binary
+python -c "file('rmbinary', 'wb').write('\0')"
+hg ci -Am addfiles
+echo regular >> regular
+echo newregular >> newregular
+rm rmempty
+touch newempty
+rm rmregular
+echo exec >> exec
+echo newexec > newexec
+chmod +x newexec
+rm rmexec
+chmod +x setexec
+chmod -x unsetexec
+python -c "file('binary', 'wb').write('\0\0')"
+python -c "file('newbinary', 'wb').write('\0')"
+rm rmbinary
+hg addremove
+
+echo '% git=no: regular diff for all files'
+hg autodiff --git=no
+
+echo '% git=no: git diff for single regular file'
+hg autodiff --git=yes regular
+
+echo '% git=auto: regular diff for regular files and removals'
+hg autodiff --git=auto regular newregular rmregular rmbinary rmexec
+
+for f in exec newexec setexec unsetexec binary newbinary newempty rmempty; do
+ echo '% git=auto: git diff for' $f
+ hg autodiff --git=auto $f
+done
+
+echo '% git=warn: regular diff with data loss warnings'
+hg autodiff --git=warn
+
+echo '% git=abort: fail on execute bit change'
+hg autodiff --git=abort regular setexec
+
+echo '% git=abort: succeed on regular file'
+hg autodiff --git=abort regular
+
+cd ..
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-diff-upgrade.out Fri Jan 01 20:54:05 2010 +0100
@@ -0,0 +1,186 @@
+% make a combination of new, changed and deleted file
+adding binary
+adding exec
+adding regular
+adding rmbinary
+adding rmempty
+adding rmexec
+adding rmregular
+adding setexec
+adding unsetexec
+adding newbinary
+adding newempty
+adding newexec
+adding newregular
+removing rmbinary
+removing rmempty
+removing rmexec
+removing rmregular
+% git=no: regular diff for all files
+diff -r b3f053cd7c7f binary
+Binary file binary has changed
+diff -r b3f053cd7c7f exec
+--- a/exec
++++ b/exec
+@@ -1,1 +1,2 @@
+ exec
++exec
+diff -r b3f053cd7c7f newbinary
+Binary file newbinary has changed
+diff -r b3f053cd7c7f newexec
+--- /dev/null
++++ b/newexec
+@@ -0,0 +1,1 @@
++newexec
+diff -r b3f053cd7c7f newregular
+--- /dev/null
++++ b/newregular
+@@ -0,0 +1,1 @@
++newregular
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+diff -r b3f053cd7c7f rmbinary
+Binary file rmbinary has changed
+diff -r b3f053cd7c7f rmexec
+--- a/rmexec
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmexec
+diff -r b3f053cd7c7f rmregular
+--- a/rmregular
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmregular
+% git=no: git diff for single regular file
+diff --git a/regular b/regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+% git=auto: regular diff for regular files and removals
+diff -r b3f053cd7c7f newregular
+--- /dev/null
++++ b/newregular
+@@ -0,0 +1,1 @@
++newregular
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+diff -r b3f053cd7c7f rmbinary
+Binary file rmbinary has changed
+diff -r b3f053cd7c7f rmexec
+--- a/rmexec
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmexec
+diff -r b3f053cd7c7f rmregular
+--- a/rmregular
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmregular
+% git=auto: git diff for exec
+diff -r b3f053cd7c7f exec
+--- a/exec
++++ b/exec
+@@ -1,1 +1,2 @@
+ exec
++exec
+% git=auto: git diff for newexec
+diff --git a/newexec b/newexec
+new file mode 100755
+--- /dev/null
++++ b/newexec
+@@ -0,0 +1,1 @@
++newexec
+% git=auto: git diff for setexec
+diff --git a/setexec b/setexec
+old mode 100644
+new mode 100755
+% git=auto: git diff for unsetexec
+diff --git a/unsetexec b/unsetexec
+old mode 100755
+new mode 100644
+% git=auto: git diff for binary
+diff --git a/binary b/binary
+index a9128c283485202893f5af379dd9beccb6e79486..09f370e38f498a462e1ca0faa724559b6630c04f
+GIT binary patch
+literal 2
+Jc${Nk0000200961
+
+% git=auto: git diff for newbinary
+diff --git a/newbinary b/newbinary
+new file mode 100644
+index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d
+GIT binary patch
+literal 1
+Ic${MZ000310RR91
+
+% git=auto: git diff for newempty
+diff --git a/newempty b/newempty
+new file mode 100644
+% git=auto: git diff for rmempty
+diff --git a/rmempty b/rmempty
+deleted file mode 100644
+% git=warn: regular diff with data loss warnings
+diff -r b3f053cd7c7f binary
+Binary file binary has changed
+diff -r b3f053cd7c7f exec
+--- a/exec
++++ b/exec
+@@ -1,1 +1,2 @@
+ exec
++exec
+diff -r b3f053cd7c7f newbinary
+Binary file newbinary has changed
+diff -r b3f053cd7c7f newexec
+--- /dev/null
++++ b/newexec
+@@ -0,0 +1,1 @@
++newexec
+diff -r b3f053cd7c7f newregular
+--- /dev/null
++++ b/newregular
+@@ -0,0 +1,1 @@
++newregular
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular
+diff -r b3f053cd7c7f rmbinary
+Binary file rmbinary has changed
+diff -r b3f053cd7c7f rmexec
+--- a/rmexec
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmexec
+diff -r b3f053cd7c7f rmregular
+--- a/rmregular
++++ /dev/null
+@@ -1,1 +0,0 @@
+-rmregular
+data lost for: binary
+data lost for: newbinary
+data lost for: newempty
+data lost for: newexec
+data lost for: rmempty
+data lost for: setexec
+data lost for: unsetexec
+% git=abort: fail on execute bit change
+abort: losing data for setexec
+% git=abort: succeed on regular file
+diff -r b3f053cd7c7f regular
+--- a/regular
++++ b/regular
+@@ -1,1 +1,2 @@
+ regular
++regular