hgext/eol.py
branchstable
changeset 11249 0bb67503ad4b
child 12062 c327bfa5e831
child 12307 0852da25a31b
equal deleted inserted replaced
11246:8f5ad12db28e 11249:0bb67503ad4b
       
     1 """automatically manage newlines in repository files
       
     2 
       
     3 This extension allows you to manage the type of line endings (CRLF or
       
     4 LF) that are used in the repository and in the local working
       
     5 directory. That way you can get CRLF line endings on Windows and LF on
       
     6 Unix/Mac, thereby letting everybody use their OS native line endings.
       
     7 
       
     8 The extension reads its configuration from a versioned ``.hgeol``
       
     9 configuration file every time you run an ``hg`` command. The
       
    10 ``.hgeol`` file use the same syntax as all other Mercurial
       
    11 configuration files. It uses two sections, ``[patterns]`` and
       
    12 ``[repository]``.
       
    13 
       
    14 The ``[patterns]`` section specifies the line endings used in the
       
    15 working directory. The format is specified by a file pattern. The
       
    16 first match is used, so put more specific patterns first. The
       
    17 available line endings are ``LF``, ``CRLF``, and ``BIN``.
       
    18 
       
    19 Files with the declared format of ``CRLF`` or ``LF`` are always
       
    20 checked out in that format and files declared to be binary (``BIN``)
       
    21 are left unchanged. Additionally, ``native`` is an alias for the
       
    22 platform's default line ending: ``LF`` on Unix (including Mac OS X)
       
    23 and ``CRLF`` on Windows. Note that ``BIN`` (do nothing to line
       
    24 endings) is Mercurial's default behaviour; it is only needed if you
       
    25 need to override a later, more general pattern.
       
    26 
       
    27 The optional ``[repository]`` section specifies the line endings to
       
    28 use for files stored in the repository. It has a single setting,
       
    29 ``native``, which determines the storage line endings for files
       
    30 declared as ``native`` in the ``[patterns]`` section. It can be set to
       
    31 ``LF`` or ``CRLF``. The default is ``LF``. For example, this means
       
    32 that on Windows, files configured as ``native`` (``CRLF`` by default)
       
    33 will be converted to ``LF`` when stored in the repository. Files
       
    34 declared as ``LF``, ``CRLF``, or ``BIN`` in the ``[patterns]`` section
       
    35 are always stored as-is in the repository.
       
    36 
       
    37 Example versioned ``.hgeol`` file::
       
    38 
       
    39   [patterns]
       
    40   **.py = native
       
    41   **.vcproj = CRLF
       
    42   **.txt = native
       
    43   Makefile = LF
       
    44   **.jpg = BIN
       
    45 
       
    46   [repository]
       
    47   native = LF
       
    48 
       
    49 The extension uses an optional ``[eol]`` section in your hgrc file
       
    50 (not the ``.hgeol`` file) for settings that control the overall
       
    51 behavior. There are two settings:
       
    52 
       
    53 - ``eol.native`` (default ``os.linesep``) can be set to ``LF`` or
       
    54   ``CRLF`` override the default interpretation of ``native`` for
       
    55   checkout. This can be used with :hg:`archive` on Unix, say, to
       
    56   generate an archive where files have line endings for Windows.
       
    57 
       
    58 - ``eol.only-consistent`` (default True) can be set to False to make
       
    59   the extension convert files with inconsistent EOLs. Inconsistent
       
    60   means that there is both ``CRLF`` and ``LF`` present in the file.
       
    61   Such files are normally not touched under the assumption that they
       
    62   have mixed EOLs on purpose.
       
    63 
       
    64 See :hg:`help patterns` for more information about the glob patterns
       
    65 used.
       
    66 """
       
    67 
       
    68 from mercurial.i18n import _
       
    69 from mercurial import util, config, extensions, commands, match, cmdutil
       
    70 import re, os
       
    71 
       
    72 # Matches a lone LF, i.e., one that is not part of CRLF.
       
    73 singlelf = re.compile('(^|[^\r])\n')
       
    74 # Matches a single EOL which can either be a CRLF where repeated CR
       
    75 # are removed or a LF. We do not care about old Machintosh files, so a
       
    76 # stray CR is an error.
       
    77 eolre = re.compile('\r*\n')
       
    78 
       
    79 
       
    80 def inconsistenteol(data):
       
    81     return '\r\n' in data and singlelf.search(data)
       
    82 
       
    83 def tolf(s, params, ui, **kwargs):
       
    84     """Filter to convert to LF EOLs."""
       
    85     if util.binary(s):
       
    86         return s
       
    87     if ui.configbool('eol', 'only-consistent', True) and inconsistenteol(s):
       
    88         return s
       
    89     return eolre.sub('\n', s)
       
    90 
       
    91 def tocrlf(s, params, ui, **kwargs):
       
    92     """Filter to convert to CRLF EOLs."""
       
    93     if util.binary(s):
       
    94         return s
       
    95     if ui.configbool('eol', 'only-consistent', True) and inconsistenteol(s):
       
    96         return s
       
    97     return eolre.sub('\r\n', s)
       
    98 
       
    99 def isbinary(s, params):
       
   100     """Filter to do nothing with the file."""
       
   101     return s
       
   102 
       
   103 filters = {
       
   104     'to-lf': tolf,
       
   105     'to-crlf': tocrlf,
       
   106     'is-binary': isbinary,
       
   107 }
       
   108 
       
   109 
       
   110 def hook(ui, repo, node, hooktype, **kwargs):
       
   111     """verify that files have expected EOLs"""
       
   112     files = set()
       
   113     for rev in xrange(repo[node].rev(), len(repo)):
       
   114         files.update(repo[rev].files())
       
   115     tip = repo['tip']
       
   116     for f in files:
       
   117         if f not in tip:
       
   118             continue
       
   119         for pattern, target in ui.configitems('encode'):
       
   120             if match.match(repo.root, '', [pattern])(f):
       
   121                 data = tip[f].data()
       
   122                 if target == "to-lf" and "\r\n" in data:
       
   123                     raise util.Abort(_("%s should not have CRLF line endings")
       
   124                                      % f)
       
   125                 elif target == "to-crlf" and singlelf.search(data):
       
   126                     raise util.Abort(_("%s should not have LF line endings")
       
   127                                      % f)
       
   128 
       
   129 
       
   130 def preupdate(ui, repo, hooktype, parent1, parent2):
       
   131     #print "preupdate for %s: %s -> %s" % (repo.root, parent1, parent2)
       
   132     repo.readhgeol(parent1)
       
   133     return False
       
   134 
       
   135 def uisetup(ui):
       
   136     ui.setconfig('hooks', 'preupdate.eol', preupdate)
       
   137 
       
   138 def extsetup(ui):
       
   139     try:
       
   140         extensions.find('win32text')
       
   141         raise util.Abort(_("the eol extension is incompatible with the "
       
   142                            "win32text extension"))
       
   143     except KeyError:
       
   144         pass
       
   145 
       
   146 
       
   147 def reposetup(ui, repo):
       
   148     #print "reposetup for", repo.root
       
   149 
       
   150     if not repo.local():
       
   151         return
       
   152     for name, fn in filters.iteritems():
       
   153         repo.adddatafilter(name, fn)
       
   154 
       
   155     ui.setconfig('patch', 'eol', 'auto')
       
   156 
       
   157     class eolrepo(repo.__class__):
       
   158 
       
   159         _decode = {'LF': 'to-lf', 'CRLF': 'to-crlf', 'BIN': 'is-binary'}
       
   160         _encode = {'LF': 'to-lf', 'CRLF': 'to-crlf', 'BIN': 'is-binary'}
       
   161 
       
   162         def readhgeol(self, node=None, data=None):
       
   163             if data is None:
       
   164                 try:
       
   165                     if node is None:
       
   166                         data = self.wfile('.hgeol').read()
       
   167                     else:
       
   168                         data = self[node]['.hgeol'].data()
       
   169                 except (IOError, LookupError):
       
   170                     return None
       
   171 
       
   172             if self.ui.config('eol', 'native', os.linesep) in ('LF', '\n'):
       
   173                 self._decode['NATIVE'] = 'to-lf'
       
   174             else:
       
   175                 self._decode['NATIVE'] = 'to-crlf'
       
   176 
       
   177             eol = config.config()
       
   178             eol.parse('.hgeol', data)
       
   179 
       
   180             if eol.get('repository', 'native') == 'CRLF':
       
   181                 self._encode['NATIVE'] = 'to-crlf'
       
   182             else:
       
   183                 self._encode['NATIVE'] = 'to-lf'
       
   184 
       
   185             for pattern, style in eol.items('patterns'):
       
   186                 key = style.upper()
       
   187                 try:
       
   188                     self.ui.setconfig('decode', pattern, self._decode[key])
       
   189                     self.ui.setconfig('encode', pattern, self._encode[key])
       
   190                 except KeyError:
       
   191                     self.ui.warn(_("ignoring unknown EOL style '%s' from %s\n")
       
   192                                  % (style, eol.source('patterns', pattern)))
       
   193 
       
   194             include = []
       
   195             exclude = []
       
   196             for pattern, style in eol.items('patterns'):
       
   197                 key = style.upper()
       
   198                 if key == 'BIN':
       
   199                     exclude.append(pattern)
       
   200                 else:
       
   201                     include.append(pattern)
       
   202 
       
   203             # This will match the files for which we need to care
       
   204             # about inconsistent newlines.
       
   205             return match.match(self.root, '', [], include, exclude)
       
   206 
       
   207         def _hgcleardirstate(self):
       
   208             self._eolfile = self.readhgeol() or self.readhgeol('tip')
       
   209 
       
   210             if not self._eolfile:
       
   211                 self._eolfile = util.never
       
   212                 return
       
   213 
       
   214             try:
       
   215                 cachemtime = os.path.getmtime(self.join("eol.cache"))
       
   216             except OSError:
       
   217                 cachemtime = 0
       
   218 
       
   219             try:
       
   220                 eolmtime = os.path.getmtime(self.wjoin(".hgeol"))
       
   221             except OSError:
       
   222                 eolmtime = 0
       
   223 
       
   224             if eolmtime > cachemtime:
       
   225                 ui.debug("eol: detected change in .hgeol\n")
       
   226                 # TODO: we could introduce a method for this in dirstate.
       
   227                 wlock = None
       
   228                 try:
       
   229                     wlock = self.wlock()
       
   230                     for f, e in self.dirstate._map.iteritems():
       
   231                         self.dirstate._map[f] = (e[0], e[1], -1, 0)
       
   232                     self.dirstate._dirty = True
       
   233                     # Touch the cache to update mtime. TODO: are we sure this
       
   234                     # always enought to update the mtime, or should we write a
       
   235                     # bit to the file?
       
   236                     self.opener("eol.cache", "w").close()
       
   237                 finally:
       
   238                     if wlock is not None:
       
   239                         wlock.release()
       
   240 
       
   241         def commitctx(self, ctx, error=False):
       
   242             for f in sorted(ctx.added() + ctx.modified()):
       
   243                 if not self._eolfile(f):
       
   244                     continue
       
   245                 data = ctx[f].data()
       
   246                 if util.binary(data):
       
   247                     # We should not abort here, since the user should
       
   248                     # be able to say "** = native" to automatically
       
   249                     # have all non-binary files taken care of.
       
   250                     continue
       
   251                 if inconsistenteol(data):
       
   252                     raise util.Abort(_("inconsistent newline style "
       
   253                                        "in %s\n" % f))
       
   254             return super(eolrepo, self).commitctx(ctx, error)
       
   255     repo.__class__ = eolrepo
       
   256     repo._hgcleardirstate()