changeset 8810:ac92775b3b80

Add patch.eol to ignore EOLs when patching (issue1019) The intent is to fix many issues involving patching when win32ext is enabled. With win32ext, the working directory and repository files EOLs are not the same which means that patches made on a non-win32ext host do not apply cleanly because of EOLs discrepancies. A theorically correct approach would be transform either the patched file or the patch content with the encoding/decoding filters used by win32ext. This solution is tricky to implement and invasive, instead we prefer to address the win32ext case, by offering a way to ignore input EOLs when patching and rewriting them when saving the patched result.
author Patrick Mezard <pmezard@gmail.com>
date Mon, 15 Jun 2009 00:03:26 +0200
parents 6fce36336e42
children 8b35b08724eb
files doc/hgrc.5.txt hgext/keyword.py mercurial/commands.py mercurial/patch.py tests/test-import-eol tests/test-import-eol.out
diffstat 6 files changed, 165 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/doc/hgrc.5.txt	Sun Jun 14 13:32:19 2009 -0700
+++ b/doc/hgrc.5.txt	Mon Jun 15 00:03:26 2009 +0200
@@ -607,6 +607,17 @@
     Optional. It's the hostname that the sender can use to identify
     itself to the MTA.
 
+[[patch]]
+patch::
+  Settings used when applying patches, for instance through the 'import'
+  command or with Mercurial Queues extension.
+  eol;;
+    When set to 'strict' patch content and patched files end of lines
+    are preserved. When set to 'lf' or 'crlf', both files end of lines
+    are ignored when patching and the result line endings are
+    normalized to either LF (Unix) or CRLF (Windows).
+    Default: strict.
+
 [[paths]]
 paths::
   Assigns symbolic names to repositories. The left side is the
--- a/hgext/keyword.py	Sun Jun 14 13:32:19 2009 -0700
+++ b/hgext/keyword.py	Mon Jun 15 00:03:26 2009 +0200
@@ -485,10 +485,10 @@
                 release(lock, wlock)
 
     # monkeypatches
-    def kwpatchfile_init(orig, self, ui, fname, opener, missing=False):
+    def kwpatchfile_init(orig, self, ui, fname, opener, missing=False, eol=None):
         '''Monkeypatch/wrap patch.patchfile.__init__ to avoid
         rejects or conflicts due to expanded keywords in working dir.'''
-        orig(self, ui, fname, opener, missing)
+        orig(self, ui, fname, opener, missing, eol)
         # shrink keywords read from working dir
         self.lines = kwt.shrinklines(self.fname, self.lines)
 
--- a/mercurial/commands.py	Sun Jun 14 13:32:19 2009 -0700
+++ b/mercurial/commands.py	Mon Jun 15 00:03:26 2009 +0200
@@ -1764,7 +1764,7 @@
                 files = {}
                 try:
                     patch.patch(tmpname, ui, strip=strip, cwd=repo.root,
-                                files=files)
+                                files=files, eolmode=None)
                 finally:
                     files = patch.updatedir(ui, repo, files, similarity=sim/100.)
                 if not opts.get('no_commit'):
--- a/mercurial/patch.py	Sun Jun 14 13:32:19 2009 -0700
+++ b/mercurial/patch.py	Mon Jun 15 00:03:26 2009 +0200
@@ -228,13 +228,42 @@
 
     return (dopatch, gitpatches)
 
+class linereader:
+    # simple class to allow pushing lines back into the input stream
+    def __init__(self, fp, textmode=False):
+        self.fp = fp
+        self.buf = []
+        self.textmode = textmode
+
+    def push(self, line):
+        if line is not None:
+            self.buf.append(line)
+
+    def readline(self):
+        if self.buf:
+            l = self.buf[0]
+            del self.buf[0]
+            return l
+        l = self.fp.readline()
+        if self.textmode and l.endswith('\r\n'):
+            l = l[:-2] + '\n'
+        return l
+
+    def __iter__(self):
+        while 1:
+            l = self.readline()
+            if not l:
+                break
+            yield l
+
 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
 unidesc = re.compile('@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@')
 contextdesc = re.compile('(---|\*\*\*) (\d+)(,(\d+))? (---|\*\*\*)')
 
 class patchfile(object):
-    def __init__(self, ui, fname, opener, missing=False):
+    def __init__(self, ui, fname, opener, missing=False, eol=None):
         self.fname = fname
+        self.eol = eol
         self.opener = opener
         self.ui = ui
         self.lines = []
@@ -260,14 +289,20 @@
     def readlines(self, fname):
         fp = self.opener(fname, 'r')
         try:
-            return fp.readlines()
+            return list(linereader(fp, self.eol is not None))
         finally:
             fp.close()
 
     def writelines(self, fname, lines):
         fp = self.opener(fname, 'w')
         try:
-            fp.writelines(lines)
+            if self.eol and self.eol != '\n':
+                for l in lines:
+                    if l and l[-1] == '\n':
+                        l = l[:1] + self.eol
+                    fp.write(l)
+            else:
+                fp.writelines(lines)
         finally:
             fp.close()
 
@@ -782,28 +817,6 @@
 
     return fname, missing
 
-class linereader(object):
-    # simple class to allow pushing lines back into the input stream
-    def __init__(self, fp):
-        self.fp = fp
-        self.buf = []
-
-    def push(self, line):
-        if line is not None:
-            self.buf.append(line)
-
-    def readline(self):
-        if self.buf:
-            return self.buf.pop(0)
-        return self.fp.readline()
-
-    def __iter__(self):
-        while 1:
-            l = self.readline()
-            if not l:
-                break
-            yield l
-
 def scangitpatch(lr, firstline):
     """
     Git patches can emit:
@@ -824,19 +837,21 @@
         fp = lr.fp
     except IOError:
         fp = cStringIO.StringIO(lr.fp.read())
-    gitlr = linereader(fp)
+    gitlr = linereader(fp, lr.textmode)
     gitlr.push(firstline)
     (dopatch, gitpatches) = readgitpatch(gitlr)
     fp.seek(pos)
     return dopatch, gitpatches
 
-def iterhunks(ui, fp, sourcefile=None):
+def iterhunks(ui, fp, sourcefile=None, textmode=False):
     """Read a patch and yield the following events:
     - ("file", afile, bfile, firsthunk): select a new target file.
     - ("hunk", hunk): a new hunk is ready to be applied, follows a
     "file" event.
     - ("git", gitchanges): current diff is in git format, gitchanges
     maps filenames to gitpatch records. Unique event.
+
+    If textmode is True, input line-endings are normalized to LF.
     """
     changed = {}
     current_hunk = None
@@ -850,7 +865,7 @@
     # our states
     BFILE = 1
     context = None
-    lr = linereader(fp)
+    lr = linereader(fp, textmode)
     dopatch = True
     # gitworkdone is True if a git operation (copy, rename, ...) was
     # performed already for the current file. Useful when the file
@@ -954,17 +969,25 @@
     if hunknum == 0 and dopatch and not gitworkdone:
         raise NoHunks
 
-def applydiff(ui, fp, changed, strip=1, sourcefile=None, reverse=False):
-    """reads a patch from fp and tries to apply it.  The dict 'changed' is
-       filled in with all of the filenames changed by the patch.  Returns 0
-       for a clean patch, -1 if any rejects were found and 1 if there was
-       any fuzz."""
+def applydiff(ui, fp, changed, strip=1, sourcefile=None, reverse=False,
+              eol=None):
+    """
+    Reads a patch from fp and tries to apply it. 
 
+    The dict 'changed' is filled in with all of the filenames changed
+    by the patch. Returns 0 for a clean patch, -1 if any rejects were
+    found and 1 if there was any fuzz.
+
+    If 'eol' is None, the patch content and patched file are read in
+    binary mode. Otherwise, line endings are ignored when patching then
+    normalized to 'eol' (usually '\n' or \r\n').
+    """
     rejects = 0
     err = 0
     current_file = None
     gitpatches = None
     opener = util.opener(os.getcwd())
+    textmode = eol is not None
 
     def closefile():
         if not current_file:
@@ -972,7 +995,7 @@
         current_file.close()
         return len(current_file.rej)
 
-    for state, values in iterhunks(ui, fp, sourcefile):
+    for state, values in iterhunks(ui, fp, sourcefile, textmode):
         if state == 'hunk':
             if not current_file:
                 continue
@@ -987,11 +1010,11 @@
             afile, bfile, first_hunk = values
             try:
                 if sourcefile:
-                    current_file = patchfile(ui, sourcefile, opener)
+                    current_file = patchfile(ui, sourcefile, opener, eol=eol)
                 else:
                     current_file, missing = selectfile(afile, bfile, first_hunk,
                                             strip, reverse)
-                    current_file = patchfile(ui, current_file, opener, missing)
+                    current_file = patchfile(ui, current_file, opener, missing, eol)
             except PatchError, err:
                 ui.warn(str(err) + '\n')
                 current_file, current_hunk = None, None
@@ -1104,9 +1127,17 @@
                          util.explain_exit(code)[0])
     return fuzz
 
-def internalpatch(patchobj, ui, strip, cwd, files={}):
+def internalpatch(patchobj, ui, strip, cwd, files={}, eolmode='strict'):
     """use builtin patch to apply <patchobj> to the working directory.
     returns whether patch was applied with fuzz factor."""
+
+    if eolmode is None:
+        eolmode = ui.config('patch', 'eol', 'strict')
+    try:
+        eol = {'strict': None, 'crlf': '\r\n', 'lf': '\n'}[eolmode.lower()]
+    except KeyError:
+        raise util.Abort(_('Unsupported line endings type: %s') % eolmode)
+            
     try:
         fp = file(patchobj, 'rb')
     except TypeError:
@@ -1115,7 +1146,7 @@
         curdir = os.getcwd()
         os.chdir(cwd)
     try:
-        ret = applydiff(ui, fp, files, strip=strip)
+        ret = applydiff(ui, fp, files, strip=strip, eol=eol)
     finally:
         if cwd:
             os.chdir(curdir)
@@ -1123,9 +1154,18 @@
         raise PatchError
     return ret > 0
 
-def patch(patchname, ui, strip=1, cwd=None, files={}):
-    """apply <patchname> to the working directory.
-    returns whether patch was applied with fuzz factor."""
+def patch(patchname, ui, strip=1, cwd=None, files={}, eolmode='strict'):
+    """Apply <patchname> to the working directory.
+
+    'eolmode' specifies how end of lines should be handled. It can be:
+    - 'strict': inputs are read in binary mode, EOLs are preserved
+    - 'crlf': EOLs are ignored when patching and reset to CRLF
+    - 'lf': EOLs are ignored when patching and reset to LF
+    - None: get it from user settings, default to 'strict'
+    'eolmode' is ignored when using an external patcher program.
+
+    Returns whether patch was applied with fuzz factor.
+    """
     patcher = ui.config('ui', 'patch')
     args = []
     try:
@@ -1134,7 +1174,7 @@
                                  files)
         else:
             try:
-                return internalpatch(patchname, ui, strip, cwd, files)
+                return internalpatch(patchname, ui, strip, cwd, files, eolmode)
             except NoHunks:
                 patcher = util.find_exe('gpatch') or util.find_exe('patch') or 'patch'
                 ui.debug(_('no valid hunks found; trying with %r instead\n') %
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-import-eol	Mon Jun 15 00:03:26 2009 +0200
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+cat > makepatch.py <<EOF
+f = file('eol.diff', 'wb')
+w = f.write
+w('test message\n')
+w('diff --git a/a b/a\n')
+w('--- a/a\n')
+w('+++ b/a\n')
+w('@@ -1,5 +1,5 @@\n')
+w(' a\n')
+w('-b\r\n')
+w('+y\r\n')
+w(' c\r\n')
+w(' d\n')
+w('-e\n')
+w('\ No newline at end of file\n')
+w('+z\r\n')
+w('\ No newline at end of file\r\n')
+EOF
+
+hg init repo
+cd repo
+echo '\.diff' > .hgignore
+
+# Test different --eol values
+python -c 'file("a", "wb").write("a\nb\nc\nd\ne")'
+hg ci -Am adda
+python ../makepatch.py
+echo % invalid eol
+hg --config patch.eol='LFCR' import eol.diff
+hg revert -a
+echo % force LF
+hg --traceback --config patch.eol='LF' import eol.diff
+python -c 'print repr(file("a","rb").read())'
+hg st
+echo % force CRLF
+hg up -C 0
+hg --traceback --config patch.eol='CRLF' import eol.diff
+python -c 'print repr(file("a","rb").read())'
+hg st
+
+# Test --eol and binary patches
+python -c 'file("b", "wb").write("a\x00\nb")'
+hg ci -Am addb
+python -c 'file("b", "wb").write("a\x00\nc")'
+hg diff --git > bin.diff
+hg revert --no-backup b
+echo % binary patch with --eol
+hg import --config patch.eol='CRLF' -m changeb bin.diff
+python -c 'print repr(file("b","rb").read())'
+hg st
+cd ..
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-import-eol.out	Mon Jun 15 00:03:26 2009 +0200
@@ -0,0 +1,16 @@
+adding .hgignore
+adding a
+% invalid eol
+applying eol.diff
+abort: Unsupported line endings type: LFCR
+% force LF
+applying eol.diff
+'a\ny\nc\nd\nz'
+% force CRLF
+1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+applying eol.diff
+'a\r\ny\r\nc\r\nd\r\nz'
+adding b
+% binary patch with --eol
+applying bin.diff
+'a\x00\nc'