comparison hgext/eol.py @ 11249:0bb67503ad4b stable

eol: extension for managing file EOLs
author Martin Geisler <mg@lazybytes.net>
date Mon, 31 May 2010 21:37:01 +0200
parents
children c327bfa5e831 0852da25a31b
comparison
equal deleted inserted replaced
11246:8f5ad12db28e 11249:0bb67503ad4b
1 """automatically manage newlines in repository files
2
3 This extension allows you to manage the type of line endings (CRLF or
4 LF) that are used in the repository and in the local working
5 directory. That way you can get CRLF line endings on Windows and LF on
6 Unix/Mac, thereby letting everybody use their OS native line endings.
7
8 The extension reads its configuration from a versioned ``.hgeol``
9 configuration file every time you run an ``hg`` command. The
10 ``.hgeol`` file use the same syntax as all other Mercurial
11 configuration files. It uses two sections, ``[patterns]`` and
12 ``[repository]``.
13
14 The ``[patterns]`` section specifies the line endings used in the
15 working directory. The format is specified by a file pattern. The
16 first match is used, so put more specific patterns first. The
17 available line endings are ``LF``, ``CRLF``, and ``BIN``.
18
19 Files with the declared format of ``CRLF`` or ``LF`` are always
20 checked out in that format and files declared to be binary (``BIN``)
21 are left unchanged. Additionally, ``native`` is an alias for the
22 platform's default line ending: ``LF`` on Unix (including Mac OS X)
23 and ``CRLF`` on Windows. Note that ``BIN`` (do nothing to line
24 endings) is Mercurial's default behaviour; it is only needed if you
25 need to override a later, more general pattern.
26
27 The optional ``[repository]`` section specifies the line endings to
28 use for files stored in the repository. It has a single setting,
29 ``native``, which determines the storage line endings for files
30 declared as ``native`` in the ``[patterns]`` section. It can be set to
31 ``LF`` or ``CRLF``. The default is ``LF``. For example, this means
32 that on Windows, files configured as ``native`` (``CRLF`` by default)
33 will be converted to ``LF`` when stored in the repository. Files
34 declared as ``LF``, ``CRLF``, or ``BIN`` in the ``[patterns]`` section
35 are always stored as-is in the repository.
36
37 Example versioned ``.hgeol`` file::
38
39 [patterns]
40 **.py = native
41 **.vcproj = CRLF
42 **.txt = native
43 Makefile = LF
44 **.jpg = BIN
45
46 [repository]
47 native = LF
48
49 The extension uses an optional ``[eol]`` section in your hgrc file
50 (not the ``.hgeol`` file) for settings that control the overall
51 behavior. There are two settings:
52
53 - ``eol.native`` (default ``os.linesep``) can be set to ``LF`` or
54 ``CRLF`` override the default interpretation of ``native`` for
55 checkout. This can be used with :hg:`archive` on Unix, say, to
56 generate an archive where files have line endings for Windows.
57
58 - ``eol.only-consistent`` (default True) can be set to False to make
59 the extension convert files with inconsistent EOLs. Inconsistent
60 means that there is both ``CRLF`` and ``LF`` present in the file.
61 Such files are normally not touched under the assumption that they
62 have mixed EOLs on purpose.
63
64 See :hg:`help patterns` for more information about the glob patterns
65 used.
66 """
67
68 from mercurial.i18n import _
69 from mercurial import util, config, extensions, commands, match, cmdutil
70 import re, os
71
72 # Matches a lone LF, i.e., one that is not part of CRLF.
73 singlelf = re.compile('(^|[^\r])\n')
74 # Matches a single EOL which can either be a CRLF where repeated CR
75 # are removed or a LF. We do not care about old Machintosh files, so a
76 # stray CR is an error.
77 eolre = re.compile('\r*\n')
78
79
80 def inconsistenteol(data):
81 return '\r\n' in data and singlelf.search(data)
82
83 def tolf(s, params, ui, **kwargs):
84 """Filter to convert to LF EOLs."""
85 if util.binary(s):
86 return s
87 if ui.configbool('eol', 'only-consistent', True) and inconsistenteol(s):
88 return s
89 return eolre.sub('\n', s)
90
91 def tocrlf(s, params, ui, **kwargs):
92 """Filter to convert to CRLF EOLs."""
93 if util.binary(s):
94 return s
95 if ui.configbool('eol', 'only-consistent', True) and inconsistenteol(s):
96 return s
97 return eolre.sub('\r\n', s)
98
99 def isbinary(s, params):
100 """Filter to do nothing with the file."""
101 return s
102
103 filters = {
104 'to-lf': tolf,
105 'to-crlf': tocrlf,
106 'is-binary': isbinary,
107 }
108
109
110 def hook(ui, repo, node, hooktype, **kwargs):
111 """verify that files have expected EOLs"""
112 files = set()
113 for rev in xrange(repo[node].rev(), len(repo)):
114 files.update(repo[rev].files())
115 tip = repo['tip']
116 for f in files:
117 if f not in tip:
118 continue
119 for pattern, target in ui.configitems('encode'):
120 if match.match(repo.root, '', [pattern])(f):
121 data = tip[f].data()
122 if target == "to-lf" and "\r\n" in data:
123 raise util.Abort(_("%s should not have CRLF line endings")
124 % f)
125 elif target == "to-crlf" and singlelf.search(data):
126 raise util.Abort(_("%s should not have LF line endings")
127 % f)
128
129
130 def preupdate(ui, repo, hooktype, parent1, parent2):
131 #print "preupdate for %s: %s -> %s" % (repo.root, parent1, parent2)
132 repo.readhgeol(parent1)
133 return False
134
135 def uisetup(ui):
136 ui.setconfig('hooks', 'preupdate.eol', preupdate)
137
138 def extsetup(ui):
139 try:
140 extensions.find('win32text')
141 raise util.Abort(_("the eol extension is incompatible with the "
142 "win32text extension"))
143 except KeyError:
144 pass
145
146
147 def reposetup(ui, repo):
148 #print "reposetup for", repo.root
149
150 if not repo.local():
151 return
152 for name, fn in filters.iteritems():
153 repo.adddatafilter(name, fn)
154
155 ui.setconfig('patch', 'eol', 'auto')
156
157 class eolrepo(repo.__class__):
158
159 _decode = {'LF': 'to-lf', 'CRLF': 'to-crlf', 'BIN': 'is-binary'}
160 _encode = {'LF': 'to-lf', 'CRLF': 'to-crlf', 'BIN': 'is-binary'}
161
162 def readhgeol(self, node=None, data=None):
163 if data is None:
164 try:
165 if node is None:
166 data = self.wfile('.hgeol').read()
167 else:
168 data = self[node]['.hgeol'].data()
169 except (IOError, LookupError):
170 return None
171
172 if self.ui.config('eol', 'native', os.linesep) in ('LF', '\n'):
173 self._decode['NATIVE'] = 'to-lf'
174 else:
175 self._decode['NATIVE'] = 'to-crlf'
176
177 eol = config.config()
178 eol.parse('.hgeol', data)
179
180 if eol.get('repository', 'native') == 'CRLF':
181 self._encode['NATIVE'] = 'to-crlf'
182 else:
183 self._encode['NATIVE'] = 'to-lf'
184
185 for pattern, style in eol.items('patterns'):
186 key = style.upper()
187 try:
188 self.ui.setconfig('decode', pattern, self._decode[key])
189 self.ui.setconfig('encode', pattern, self._encode[key])
190 except KeyError:
191 self.ui.warn(_("ignoring unknown EOL style '%s' from %s\n")
192 % (style, eol.source('patterns', pattern)))
193
194 include = []
195 exclude = []
196 for pattern, style in eol.items('patterns'):
197 key = style.upper()
198 if key == 'BIN':
199 exclude.append(pattern)
200 else:
201 include.append(pattern)
202
203 # This will match the files for which we need to care
204 # about inconsistent newlines.
205 return match.match(self.root, '', [], include, exclude)
206
207 def _hgcleardirstate(self):
208 self._eolfile = self.readhgeol() or self.readhgeol('tip')
209
210 if not self._eolfile:
211 self._eolfile = util.never
212 return
213
214 try:
215 cachemtime = os.path.getmtime(self.join("eol.cache"))
216 except OSError:
217 cachemtime = 0
218
219 try:
220 eolmtime = os.path.getmtime(self.wjoin(".hgeol"))
221 except OSError:
222 eolmtime = 0
223
224 if eolmtime > cachemtime:
225 ui.debug("eol: detected change in .hgeol\n")
226 # TODO: we could introduce a method for this in dirstate.
227 wlock = None
228 try:
229 wlock = self.wlock()
230 for f, e in self.dirstate._map.iteritems():
231 self.dirstate._map[f] = (e[0], e[1], -1, 0)
232 self.dirstate._dirty = True
233 # Touch the cache to update mtime. TODO: are we sure this
234 # always enought to update the mtime, or should we write a
235 # bit to the file?
236 self.opener("eol.cache", "w").close()
237 finally:
238 if wlock is not None:
239 wlock.release()
240
241 def commitctx(self, ctx, error=False):
242 for f in sorted(ctx.added() + ctx.modified()):
243 if not self._eolfile(f):
244 continue
245 data = ctx[f].data()
246 if util.binary(data):
247 # We should not abort here, since the user should
248 # be able to say "** = native" to automatically
249 # have all non-binary files taken care of.
250 continue
251 if inconsistenteol(data):
252 raise util.Abort(_("inconsistent newline style "
253 "in %s\n" % f))
254 return super(eolrepo, self).commitctx(ctx, error)
255 repo.__class__ = eolrepo
256 repo._hgcleardirstate()