Mercurial > hg
comparison hgext/eol.py @ 11249:0bb67503ad4b stable
eol: extension for managing file EOLs
author | Martin Geisler <mg@lazybytes.net> |
---|---|
date | Mon, 31 May 2010 21:37:01 +0200 |
parents | |
children | c327bfa5e831 0852da25a31b |
comparison
equal
deleted
inserted
replaced
11246:8f5ad12db28e | 11249:0bb67503ad4b |
---|---|
1 """automatically manage newlines in repository files | |
2 | |
3 This extension allows you to manage the type of line endings (CRLF or | |
4 LF) that are used in the repository and in the local working | |
5 directory. That way you can get CRLF line endings on Windows and LF on | |
6 Unix/Mac, thereby letting everybody use their OS native line endings. | |
7 | |
8 The extension reads its configuration from a versioned ``.hgeol`` | |
9 configuration file every time you run an ``hg`` command. The | |
10 ``.hgeol`` file use the same syntax as all other Mercurial | |
11 configuration files. It uses two sections, ``[patterns]`` and | |
12 ``[repository]``. | |
13 | |
14 The ``[patterns]`` section specifies the line endings used in the | |
15 working directory. The format is specified by a file pattern. The | |
16 first match is used, so put more specific patterns first. The | |
17 available line endings are ``LF``, ``CRLF``, and ``BIN``. | |
18 | |
19 Files with the declared format of ``CRLF`` or ``LF`` are always | |
20 checked out in that format and files declared to be binary (``BIN``) | |
21 are left unchanged. Additionally, ``native`` is an alias for the | |
22 platform's default line ending: ``LF`` on Unix (including Mac OS X) | |
23 and ``CRLF`` on Windows. Note that ``BIN`` (do nothing to line | |
24 endings) is Mercurial's default behaviour; it is only needed if you | |
25 need to override a later, more general pattern. | |
26 | |
27 The optional ``[repository]`` section specifies the line endings to | |
28 use for files stored in the repository. It has a single setting, | |
29 ``native``, which determines the storage line endings for files | |
30 declared as ``native`` in the ``[patterns]`` section. It can be set to | |
31 ``LF`` or ``CRLF``. The default is ``LF``. For example, this means | |
32 that on Windows, files configured as ``native`` (``CRLF`` by default) | |
33 will be converted to ``LF`` when stored in the repository. Files | |
34 declared as ``LF``, ``CRLF``, or ``BIN`` in the ``[patterns]`` section | |
35 are always stored as-is in the repository. | |
36 | |
37 Example versioned ``.hgeol`` file:: | |
38 | |
39 [patterns] | |
40 **.py = native | |
41 **.vcproj = CRLF | |
42 **.txt = native | |
43 Makefile = LF | |
44 **.jpg = BIN | |
45 | |
46 [repository] | |
47 native = LF | |
48 | |
49 The extension uses an optional ``[eol]`` section in your hgrc file | |
50 (not the ``.hgeol`` file) for settings that control the overall | |
51 behavior. There are two settings: | |
52 | |
53 - ``eol.native`` (default ``os.linesep``) can be set to ``LF`` or | |
54 ``CRLF`` override the default interpretation of ``native`` for | |
55 checkout. This can be used with :hg:`archive` on Unix, say, to | |
56 generate an archive where files have line endings for Windows. | |
57 | |
58 - ``eol.only-consistent`` (default True) can be set to False to make | |
59 the extension convert files with inconsistent EOLs. Inconsistent | |
60 means that there is both ``CRLF`` and ``LF`` present in the file. | |
61 Such files are normally not touched under the assumption that they | |
62 have mixed EOLs on purpose. | |
63 | |
64 See :hg:`help patterns` for more information about the glob patterns | |
65 used. | |
66 """ | |
67 | |
68 from mercurial.i18n import _ | |
69 from mercurial import util, config, extensions, commands, match, cmdutil | |
70 import re, os | |
71 | |
72 # Matches a lone LF, i.e., one that is not part of CRLF. | |
73 singlelf = re.compile('(^|[^\r])\n') | |
74 # Matches a single EOL which can either be a CRLF where repeated CR | |
75 # are removed or a LF. We do not care about old Machintosh files, so a | |
76 # stray CR is an error. | |
77 eolre = re.compile('\r*\n') | |
78 | |
79 | |
80 def inconsistenteol(data): | |
81 return '\r\n' in data and singlelf.search(data) | |
82 | |
83 def tolf(s, params, ui, **kwargs): | |
84 """Filter to convert to LF EOLs.""" | |
85 if util.binary(s): | |
86 return s | |
87 if ui.configbool('eol', 'only-consistent', True) and inconsistenteol(s): | |
88 return s | |
89 return eolre.sub('\n', s) | |
90 | |
91 def tocrlf(s, params, ui, **kwargs): | |
92 """Filter to convert to CRLF EOLs.""" | |
93 if util.binary(s): | |
94 return s | |
95 if ui.configbool('eol', 'only-consistent', True) and inconsistenteol(s): | |
96 return s | |
97 return eolre.sub('\r\n', s) | |
98 | |
99 def isbinary(s, params): | |
100 """Filter to do nothing with the file.""" | |
101 return s | |
102 | |
103 filters = { | |
104 'to-lf': tolf, | |
105 'to-crlf': tocrlf, | |
106 'is-binary': isbinary, | |
107 } | |
108 | |
109 | |
110 def hook(ui, repo, node, hooktype, **kwargs): | |
111 """verify that files have expected EOLs""" | |
112 files = set() | |
113 for rev in xrange(repo[node].rev(), len(repo)): | |
114 files.update(repo[rev].files()) | |
115 tip = repo['tip'] | |
116 for f in files: | |
117 if f not in tip: | |
118 continue | |
119 for pattern, target in ui.configitems('encode'): | |
120 if match.match(repo.root, '', [pattern])(f): | |
121 data = tip[f].data() | |
122 if target == "to-lf" and "\r\n" in data: | |
123 raise util.Abort(_("%s should not have CRLF line endings") | |
124 % f) | |
125 elif target == "to-crlf" and singlelf.search(data): | |
126 raise util.Abort(_("%s should not have LF line endings") | |
127 % f) | |
128 | |
129 | |
130 def preupdate(ui, repo, hooktype, parent1, parent2): | |
131 #print "preupdate for %s: %s -> %s" % (repo.root, parent1, parent2) | |
132 repo.readhgeol(parent1) | |
133 return False | |
134 | |
135 def uisetup(ui): | |
136 ui.setconfig('hooks', 'preupdate.eol', preupdate) | |
137 | |
138 def extsetup(ui): | |
139 try: | |
140 extensions.find('win32text') | |
141 raise util.Abort(_("the eol extension is incompatible with the " | |
142 "win32text extension")) | |
143 except KeyError: | |
144 pass | |
145 | |
146 | |
147 def reposetup(ui, repo): | |
148 #print "reposetup for", repo.root | |
149 | |
150 if not repo.local(): | |
151 return | |
152 for name, fn in filters.iteritems(): | |
153 repo.adddatafilter(name, fn) | |
154 | |
155 ui.setconfig('patch', 'eol', 'auto') | |
156 | |
157 class eolrepo(repo.__class__): | |
158 | |
159 _decode = {'LF': 'to-lf', 'CRLF': 'to-crlf', 'BIN': 'is-binary'} | |
160 _encode = {'LF': 'to-lf', 'CRLF': 'to-crlf', 'BIN': 'is-binary'} | |
161 | |
162 def readhgeol(self, node=None, data=None): | |
163 if data is None: | |
164 try: | |
165 if node is None: | |
166 data = self.wfile('.hgeol').read() | |
167 else: | |
168 data = self[node]['.hgeol'].data() | |
169 except (IOError, LookupError): | |
170 return None | |
171 | |
172 if self.ui.config('eol', 'native', os.linesep) in ('LF', '\n'): | |
173 self._decode['NATIVE'] = 'to-lf' | |
174 else: | |
175 self._decode['NATIVE'] = 'to-crlf' | |
176 | |
177 eol = config.config() | |
178 eol.parse('.hgeol', data) | |
179 | |
180 if eol.get('repository', 'native') == 'CRLF': | |
181 self._encode['NATIVE'] = 'to-crlf' | |
182 else: | |
183 self._encode['NATIVE'] = 'to-lf' | |
184 | |
185 for pattern, style in eol.items('patterns'): | |
186 key = style.upper() | |
187 try: | |
188 self.ui.setconfig('decode', pattern, self._decode[key]) | |
189 self.ui.setconfig('encode', pattern, self._encode[key]) | |
190 except KeyError: | |
191 self.ui.warn(_("ignoring unknown EOL style '%s' from %s\n") | |
192 % (style, eol.source('patterns', pattern))) | |
193 | |
194 include = [] | |
195 exclude = [] | |
196 for pattern, style in eol.items('patterns'): | |
197 key = style.upper() | |
198 if key == 'BIN': | |
199 exclude.append(pattern) | |
200 else: | |
201 include.append(pattern) | |
202 | |
203 # This will match the files for which we need to care | |
204 # about inconsistent newlines. | |
205 return match.match(self.root, '', [], include, exclude) | |
206 | |
207 def _hgcleardirstate(self): | |
208 self._eolfile = self.readhgeol() or self.readhgeol('tip') | |
209 | |
210 if not self._eolfile: | |
211 self._eolfile = util.never | |
212 return | |
213 | |
214 try: | |
215 cachemtime = os.path.getmtime(self.join("eol.cache")) | |
216 except OSError: | |
217 cachemtime = 0 | |
218 | |
219 try: | |
220 eolmtime = os.path.getmtime(self.wjoin(".hgeol")) | |
221 except OSError: | |
222 eolmtime = 0 | |
223 | |
224 if eolmtime > cachemtime: | |
225 ui.debug("eol: detected change in .hgeol\n") | |
226 # TODO: we could introduce a method for this in dirstate. | |
227 wlock = None | |
228 try: | |
229 wlock = self.wlock() | |
230 for f, e in self.dirstate._map.iteritems(): | |
231 self.dirstate._map[f] = (e[0], e[1], -1, 0) | |
232 self.dirstate._dirty = True | |
233 # Touch the cache to update mtime. TODO: are we sure this | |
234 # always enought to update the mtime, or should we write a | |
235 # bit to the file? | |
236 self.opener("eol.cache", "w").close() | |
237 finally: | |
238 if wlock is not None: | |
239 wlock.release() | |
240 | |
241 def commitctx(self, ctx, error=False): | |
242 for f in sorted(ctx.added() + ctx.modified()): | |
243 if not self._eolfile(f): | |
244 continue | |
245 data = ctx[f].data() | |
246 if util.binary(data): | |
247 # We should not abort here, since the user should | |
248 # be able to say "** = native" to automatically | |
249 # have all non-binary files taken care of. | |
250 continue | |
251 if inconsistenteol(data): | |
252 raise util.Abort(_("inconsistent newline style " | |
253 "in %s\n" % f)) | |
254 return super(eolrepo, self).commitctx(ctx, error) | |
255 repo.__class__ = eolrepo | |
256 repo._hgcleardirstate() |