hgext/fix.py
changeset 37183 ded5ea279a93
child 37207 d3f1d3e4e58d
equal deleted inserted replaced
37182:559069689121 37183:ded5ea279a93
       
     1 # fix - rewrite file content in changesets and working copy
       
     2 #
       
     3 # Copyright 2018 Google LLC.
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 """rewrite file content in changesets or working copy (EXPERIMENTAL)
       
     8 
       
     9 Provides a command that runs configured tools on the contents of modified files,
       
    10 writing back any fixes to the working copy or replacing changesets.
       
    11 
       
    12 Here is an example configuration that causes :hg:`fix` to apply automatic
       
    13 formatting fixes to modified lines in C++ code::
       
    14 
       
    15   [fix]
       
    16   clang-format:command=clang-format --assume-filename={rootpath}
       
    17   clang-format:linerange=--lines={first}:{last}
       
    18   clang-format:fileset=set:**.cpp or **.hpp
       
    19 
       
    20 The :command suboption forms the first part of the shell command that will be
       
    21 used to fix a file. The content of the file is passed on standard input, and the
       
    22 fixed file content is expected on standard output. If there is any output on
       
    23 standard error, the file will not be affected. Some values may be substituted
       
    24 into the command::
       
    25 
       
    26   {rootpath}  The path of the file being fixed, relative to the repo root
       
    27   {basename}  The name of the file being fixed, without the directory path
       
    28 
       
    29 If the :linerange suboption is set, the tool will only be run if there are
       
    30 changed lines in a file. The value of this suboption is appended to the shell
       
    31 command once for every range of changed lines in the file. Some values may be
       
    32 substituted into the command::
       
    33 
       
    34   {first}   The 1-based line number of the first line in the modified range
       
    35   {last}    The 1-based line number of the last line in the modified range
       
    36 
       
    37 The :fileset suboption determines which files will be passed through each
       
    38 configured tool. See :hg:`help fileset` for possible values. If there are file
       
    39 arguments to :hg:`fix`, the intersection of these filesets is used.
       
    40 
       
    41 There is also a configurable limit for the maximum size of file that will be
       
    42 processed by :hg:`fix`::
       
    43 
       
    44   [fix]
       
    45   maxfilesize=2MB
       
    46 
       
    47 """
       
    48 
       
    49 from __future__ import absolute_import
       
    50 
       
    51 import collections
       
    52 import itertools
       
    53 import os
       
    54 import re
       
    55 import subprocess
       
    56 import sys
       
    57 
       
    58 from mercurial.i18n import _
       
    59 from mercurial.node import nullrev
       
    60 from mercurial.node import wdirrev
       
    61 
       
    62 from mercurial import (
       
    63     cmdutil,
       
    64     context,
       
    65     copies,
       
    66     error,
       
    67     match,
       
    68     mdiff,
       
    69     merge,
       
    70     obsolete,
       
    71     posix,
       
    72     registrar,
       
    73     scmutil,
       
    74     util,
       
    75 )
       
    76 
       
    77 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
       
    78 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
       
    79 # be specifying the version(s) of Mercurial they are tested with, or
       
    80 # leave the attribute unspecified.
       
    81 testedwith = 'ships-with-hg-core'
       
    82 
       
    83 cmdtable = {}
       
    84 command = registrar.command(cmdtable)
       
    85 
       
    86 configtable = {}
       
    87 configitem = registrar.configitem(configtable)
       
    88 
       
    89 # Register the suboptions allowed for each configured fixer.
       
    90 FIXER_ATTRS = ('command', 'linerange', 'fileset')
       
    91 
       
    92 for key in FIXER_ATTRS:
       
    93     configitem('fix', '.*(:%s)?' % key, default=None, generic=True)
       
    94 
       
    95 # A good default size allows most source code files to be fixed, but avoids
       
    96 # letting fixer tools choke on huge inputs, which could be surprising to the
       
    97 # user.
       
    98 configitem('fix', 'maxfilesize', default='2MB')
       
    99 
       
   100 @command('fix',
       
   101     [('', 'base', [], _('revisions to diff against (overrides automatic '
       
   102                         'selection, and applies to every revision being '
       
   103                         'fixed)'), _('REV')),
       
   104      ('r', 'rev', [], _('revisions to fix'), _('REV')),
       
   105      ('w', 'working-dir', False, _('fix the working directory')),
       
   106      ('', 'whole', False, _('always fix every line of a file'))],
       
   107     _('[OPTION]... [FILE]...'))
       
   108 def fix(ui, repo, *pats, **opts):
       
   109     """rewrite file content in changesets or working directory
       
   110 
       
   111     Runs any configured tools to fix the content of files. Only affects files
       
   112     with changes, unless file arguments are provided. Only affects changed lines
       
   113     of files, unless the --whole flag is used. Some tools may always affect the
       
   114     whole file regardless of --whole.
       
   115 
       
   116     If revisions are specified with --rev, those revisions will be checked, and
       
   117     they may be replaced with new revisions that have fixed file content.  It is
       
   118     desirable to specify all descendants of each specified revision, so that the
       
   119     fixes propagate to the descendants. If all descendants are fixed at the same
       
   120     time, no merging, rebasing, or evolution will be required.
       
   121 
       
   122     If --working-dir is used, files with uncommitted changes in the working copy
       
   123     will be fixed. If the checked-out revision is also fixed, the working
       
   124     directory will update to the replacement revision.
       
   125 
       
   126     When determining what lines of each file to fix at each revision, the whole
       
   127     set of revisions being fixed is considered, so that fixes to earlier
       
   128     revisions are not forgotten in later ones. The --base flag can be used to
       
   129     override this default behavior, though it is not usually desirable to do so.
       
   130     """
       
   131     with repo.wlock(), repo.lock():
       
   132         revstofix = getrevstofix(ui, repo, opts)
       
   133         basectxs = getbasectxs(repo, opts, revstofix)
       
   134         workqueue, numitems = getworkqueue(ui, repo, pats, opts, revstofix,
       
   135                                            basectxs)
       
   136         filedata = collections.defaultdict(dict)
       
   137         replacements = {}
       
   138         fixers = getfixers(ui)
       
   139         # Some day this loop can become a worker pool, but for now it's easier
       
   140         # to fix everything serially in topological order.
       
   141         for rev, path in sorted(workqueue):
       
   142             ctx = repo[rev]
       
   143             olddata = ctx[path].data()
       
   144             newdata = fixfile(ui, opts, fixers, ctx, path, basectxs[rev])
       
   145             if newdata != olddata:
       
   146                 filedata[rev][path] = newdata
       
   147             numitems[rev] -= 1
       
   148             if not numitems[rev]:
       
   149                 if rev == wdirrev:
       
   150                     writeworkingdir(repo, ctx, filedata[rev], replacements)
       
   151                 else:
       
   152                     replacerev(ui, repo, ctx, filedata[rev], replacements)
       
   153                 del filedata[rev]
       
   154 
       
   155         replacements = {prec: [succ] for prec, succ in replacements.iteritems()}
       
   156         scmutil.cleanupnodes(repo, replacements, 'fix')
       
   157 
       
   158 def getworkqueue(ui, repo, pats, opts, revstofix, basectxs):
       
   159     """"Constructs the list of files to be fixed at specific revisions
       
   160 
       
   161     It is up to the caller how to consume the work items, and the only
       
   162     dependence between them is that replacement revisions must be committed in
       
   163     topological order. Each work item represents a file in the working copy or
       
   164     in some revision that should be fixed and written back to the working copy
       
   165     or into a replacement revision.
       
   166     """
       
   167     workqueue = []
       
   168     numitems = collections.defaultdict(int)
       
   169     maxfilesize = ui.configbytes('fix', 'maxfilesize')
       
   170     for rev in revstofix:
       
   171         fixctx = repo[rev]
       
   172         match = scmutil.match(fixctx, pats, opts)
       
   173         for path in pathstofix(ui, repo, pats, opts, match, basectxs[rev],
       
   174                                fixctx):
       
   175             if path not in fixctx:
       
   176                 continue
       
   177             fctx = fixctx[path]
       
   178             if fctx.islink():
       
   179                 continue
       
   180             if fctx.size() > maxfilesize:
       
   181                 ui.warn(_('ignoring file larger than %s: %s\n') %
       
   182                         (util.bytecount(maxfilesize), path))
       
   183                 continue
       
   184             workqueue.append((rev, path))
       
   185             numitems[rev] += 1
       
   186     return workqueue, numitems
       
   187 
       
   188 def getrevstofix(ui, repo, opts):
       
   189     """Returns the set of revision numbers that should be fixed"""
       
   190     revs = set(scmutil.revrange(repo, opts['rev']))
       
   191     for rev in revs:
       
   192         checkfixablectx(ui, repo, repo[rev])
       
   193     if revs:
       
   194         cmdutil.checkunfinished(repo)
       
   195         checknodescendants(repo, revs)
       
   196     if opts.get('working_dir'):
       
   197         revs.add(wdirrev)
       
   198         if list(merge.mergestate.read(repo).unresolved()):
       
   199             raise error.Abort('unresolved conflicts', hint="use 'hg resolve'")
       
   200     if not revs:
       
   201         raise error.Abort(
       
   202             'no changesets specified', hint='use --rev or --working-dir')
       
   203     return revs
       
   204 
       
   205 def checknodescendants(repo, revs):
       
   206     if (not obsolete.isenabled(repo, obsolete.allowunstableopt) and
       
   207         repo.revs('(%ld::) - (%ld)', revs, revs)):
       
   208         raise error.Abort(_('can only fix a changeset together '
       
   209                             'with all its descendants'))
       
   210 
       
   211 def checkfixablectx(ui, repo, ctx):
       
   212     """Aborts if the revision shouldn't be replaced with a fixed one."""
       
   213     if not ctx.mutable():
       
   214         raise error.Abort('can\'t fix immutable changeset %s' %
       
   215                           (scmutil.formatchangeid(ctx),))
       
   216     if ctx.obsolete():
       
   217         # It would be better to actually check if the revision has a successor.
       
   218         allowdivergence = ui.configbool('experimental',
       
   219                                         'evolution.allowdivergence')
       
   220         if not allowdivergence:
       
   221             raise error.Abort('fixing obsolete revision could cause divergence')
       
   222 
       
   223 def pathstofix(ui, repo, pats, opts, match, basectxs, fixctx):
       
   224     """Returns the set of files that should be fixed in a context
       
   225 
       
   226     The result depends on the base contexts; we include any file that has
       
   227     changed relative to any of the base contexts. Base contexts should be
       
   228     ancestors of the context being fixed.
       
   229     """
       
   230     files = set()
       
   231     for basectx in basectxs:
       
   232         stat = repo.status(
       
   233             basectx, fixctx, match=match, clean=bool(pats), unknown=bool(pats))
       
   234         files.update(
       
   235             set(itertools.chain(stat.added, stat.modified, stat.clean,
       
   236                                 stat.unknown)))
       
   237     return files
       
   238 
       
   239 def lineranges(opts, path, basectxs, fixctx, content2):
       
   240     """Returns the set of line ranges that should be fixed in a file
       
   241 
       
   242     Of the form [(10, 20), (30, 40)].
       
   243 
       
   244     This depends on the given base contexts; we must consider lines that have
       
   245     changed versus any of the base contexts, and whether the file has been
       
   246     renamed versus any of them.
       
   247 
       
   248     Another way to understand this is that we exclude line ranges that are
       
   249     common to the file in all base contexts.
       
   250     """
       
   251     if opts.get('whole'):
       
   252         # Return a range containing all lines. Rely on the diff implementation's
       
   253         # idea of how many lines are in the file, instead of reimplementing it.
       
   254         return difflineranges('', content2)
       
   255 
       
   256     rangeslist = []
       
   257     for basectx in basectxs:
       
   258         basepath = copies.pathcopies(basectx, fixctx).get(path, path)
       
   259         if basepath in basectx:
       
   260             content1 = basectx[basepath].data()
       
   261         else:
       
   262             content1 = ''
       
   263         rangeslist.extend(difflineranges(content1, content2))
       
   264     return unionranges(rangeslist)
       
   265 
       
   266 def unionranges(rangeslist):
       
   267     """Return the union of some closed intervals
       
   268 
       
   269     >>> unionranges([])
       
   270     []
       
   271     >>> unionranges([(1, 100)])
       
   272     [(1, 100)]
       
   273     >>> unionranges([(1, 100), (1, 100)])
       
   274     [(1, 100)]
       
   275     >>> unionranges([(1, 100), (2, 100)])
       
   276     [(1, 100)]
       
   277     >>> unionranges([(1, 99), (1, 100)])
       
   278     [(1, 100)]
       
   279     >>> unionranges([(1, 100), (40, 60)])
       
   280     [(1, 100)]
       
   281     >>> unionranges([(1, 49), (50, 100)])
       
   282     [(1, 100)]
       
   283     >>> unionranges([(1, 48), (50, 100)])
       
   284     [(1, 48), (50, 100)]
       
   285     >>> unionranges([(1, 2), (3, 4), (5, 6)])
       
   286     [(1, 6)]
       
   287     """
       
   288     rangeslist = sorted(set(rangeslist))
       
   289     unioned = []
       
   290     if rangeslist:
       
   291         unioned, rangeslist = [rangeslist[0]], rangeslist[1:]
       
   292     for a, b in rangeslist:
       
   293         c, d = unioned[-1]
       
   294         if a > d + 1:
       
   295             unioned.append((a, b))
       
   296         else:
       
   297             unioned[-1] = (c, max(b, d))
       
   298     return unioned
       
   299 
       
   300 def difflineranges(content1, content2):
       
   301     """Return list of line number ranges in content2 that differ from content1.
       
   302 
       
   303     Line numbers are 1-based. The numbers are the first and last line contained
       
   304     in the range. Single-line ranges have the same line number for the first and
       
   305     last line. Excludes any empty ranges that result from lines that are only
       
   306     present in content1. Relies on mdiff's idea of where the line endings are in
       
   307     the string.
       
   308 
       
   309     >>> lines = lambda s: '\\n'.join([c for c in s])
       
   310     >>> difflineranges2 = lambda a, b: difflineranges(lines(a), lines(b))
       
   311     >>> difflineranges2('', '')
       
   312     []
       
   313     >>> difflineranges2('a', '')
       
   314     []
       
   315     >>> difflineranges2('', 'A')
       
   316     [(1, 1)]
       
   317     >>> difflineranges2('a', 'a')
       
   318     []
       
   319     >>> difflineranges2('a', 'A')
       
   320     [(1, 1)]
       
   321     >>> difflineranges2('ab', '')
       
   322     []
       
   323     >>> difflineranges2('', 'AB')
       
   324     [(1, 2)]
       
   325     >>> difflineranges2('abc', 'ac')
       
   326     []
       
   327     >>> difflineranges2('ab', 'aCb')
       
   328     [(2, 2)]
       
   329     >>> difflineranges2('abc', 'aBc')
       
   330     [(2, 2)]
       
   331     >>> difflineranges2('ab', 'AB')
       
   332     [(1, 2)]
       
   333     >>> difflineranges2('abcde', 'aBcDe')
       
   334     [(2, 2), (4, 4)]
       
   335     >>> difflineranges2('abcde', 'aBCDe')
       
   336     [(2, 4)]
       
   337     """
       
   338     ranges = []
       
   339     for lines, kind in mdiff.allblocks(content1, content2):
       
   340         firstline, lastline = lines[2:4]
       
   341         if kind == '!' and firstline != lastline:
       
   342             ranges.append((firstline + 1, lastline))
       
   343     return ranges
       
   344 
       
   345 def getbasectxs(repo, opts, revstofix):
       
   346     """Returns a map of the base contexts for each revision
       
   347 
       
   348     The base contexts determine which lines are considered modified when we
       
   349     attempt to fix just the modified lines in a file.
       
   350     """
       
   351     # The --base flag overrides the usual logic, and we give every revision
       
   352     # exactly the set of baserevs that the user specified.
       
   353     if opts.get('base'):
       
   354         baserevs = set(scmutil.revrange(repo, opts.get('base')))
       
   355         if not baserevs:
       
   356             baserevs = {nullrev}
       
   357         basectxs = {repo[rev] for rev in baserevs}
       
   358         return {rev: basectxs for rev in revstofix}
       
   359 
       
   360     # Proceed in topological order so that we can easily determine each
       
   361     # revision's baserevs by looking at its parents and their baserevs.
       
   362     basectxs = collections.defaultdict(set)
       
   363     for rev in sorted(revstofix):
       
   364         ctx = repo[rev]
       
   365         for pctx in ctx.parents():
       
   366             if pctx.rev() in basectxs:
       
   367                 basectxs[rev].update(basectxs[pctx.rev()])
       
   368             else:
       
   369                 basectxs[rev].add(pctx)
       
   370     return basectxs
       
   371 
       
   372 def fixfile(ui, opts, fixers, fixctx, path, basectxs):
       
   373     """Run any configured fixers that should affect the file in this context
       
   374 
       
   375     Returns the file content that results from applying the fixers in some order
       
   376     starting with the file's content in the fixctx. Fixers that support line
       
   377     ranges will affect lines that have changed relative to any of the basectxs
       
   378     (i.e. they will only avoid lines that are common to all basectxs).
       
   379     """
       
   380     newdata = fixctx[path].data()
       
   381     for fixername, fixer in fixers.iteritems():
       
   382         if fixer.affects(opts, fixctx, path):
       
   383             ranges = lineranges(opts, path, basectxs, fixctx, newdata)
       
   384             command = fixer.command(path, ranges)
       
   385             if command is None:
       
   386                 continue
       
   387             ui.debug('subprocess: %s\n' % (command,))
       
   388             proc = subprocess.Popen(
       
   389                 command,
       
   390                 shell=True,
       
   391                 cwd='/',
       
   392                 stdin=subprocess.PIPE,
       
   393                 stdout=subprocess.PIPE,
       
   394                 stderr=subprocess.PIPE)
       
   395             newerdata, stderr = proc.communicate(newdata)
       
   396             if stderr:
       
   397                 showstderr(ui, fixctx.rev(), fixername, stderr)
       
   398             else:
       
   399                 newdata = newerdata
       
   400     return newdata
       
   401 
       
   402 def showstderr(ui, rev, fixername, stderr):
       
   403     """Writes the lines of the stderr string as warnings on the ui
       
   404 
       
   405     Uses the revision number and fixername to give more context to each line of
       
   406     the error message. Doesn't include file names, since those take up a lot of
       
   407     space and would tend to be included in the error message if they were
       
   408     relevant.
       
   409     """
       
   410     for line in re.split('[\r\n]+', stderr):
       
   411         if line:
       
   412             ui.warn(('['))
       
   413             if rev is None:
       
   414                 ui.warn(_('wdir'), label='evolve.rev')
       
   415             else:
       
   416                 ui.warn((str(rev)), label='evolve.rev')
       
   417             ui.warn(('] %s: %s\n') % (fixername, line))
       
   418 
       
   419 def writeworkingdir(repo, ctx, filedata, replacements):
       
   420     """Write new content to the working copy and check out the new p1 if any
       
   421 
       
   422     We check out a new revision if and only if we fixed something in both the
       
   423     working directory and its parent revision. This avoids the need for a full
       
   424     update/merge, and means that the working directory simply isn't affected
       
   425     unless the --working-dir flag is given.
       
   426 
       
   427     Directly updates the dirstate for the affected files.
       
   428     """
       
   429     for path, data in filedata.iteritems():
       
   430         fctx = ctx[path]
       
   431         fctx.write(data, fctx.flags())
       
   432         if repo.dirstate[path] == 'n':
       
   433             repo.dirstate.normallookup(path)
       
   434 
       
   435     oldparentnodes = repo.dirstate.parents()
       
   436     newparentnodes = [replacements.get(n, n) for n in oldparentnodes]
       
   437     if newparentnodes != oldparentnodes:
       
   438         repo.setparents(*newparentnodes)
       
   439 
       
   440 def replacerev(ui, repo, ctx, filedata, replacements):
       
   441     """Commit a new revision like the given one, but with file content changes
       
   442 
       
   443     "ctx" is the original revision to be replaced by a modified one.
       
   444 
       
   445     "filedata" is a dict that maps paths to their new file content. All other
       
   446     paths will be recreated from the original revision without changes.
       
   447     "filedata" may contain paths that didn't exist in the original revision;
       
   448     they will be added.
       
   449 
       
   450     "replacements" is a dict that maps a single node to a single node, and it is
       
   451     updated to indicate the original revision is replaced by the newly created
       
   452     one. No entry is added if the replacement's node already exists.
       
   453 
       
   454     The new revision has the same parents as the old one, unless those parents
       
   455     have already been replaced, in which case those replacements are the parents
       
   456     of this new revision. Thus, if revisions are replaced in topological order,
       
   457     there is no need to rebase them into the original topology later.
       
   458     """
       
   459 
       
   460     p1rev, p2rev = repo.changelog.parentrevs(ctx.rev())
       
   461     p1ctx, p2ctx = repo[p1rev], repo[p2rev]
       
   462     newp1node = replacements.get(p1ctx.node(), p1ctx.node())
       
   463     newp2node = replacements.get(p2ctx.node(), p2ctx.node())
       
   464 
       
   465     def filectxfn(repo, memctx, path):
       
   466         if path not in ctx:
       
   467             return None
       
   468         fctx = ctx[path]
       
   469         copied = fctx.renamed()
       
   470         if copied:
       
   471             copied = copied[0]
       
   472         return context.memfilectx(
       
   473             repo,
       
   474             memctx,
       
   475             path=fctx.path(),
       
   476             data=filedata.get(path, fctx.data()),
       
   477             islink=fctx.islink(),
       
   478             isexec=fctx.isexec(),
       
   479             copied=copied)
       
   480 
       
   481     overrides = {('phases', 'new-commit'): ctx.phase()}
       
   482     with ui.configoverride(overrides, source='fix'):
       
   483         memctx = context.memctx(
       
   484             repo,
       
   485             parents=(newp1node, newp2node),
       
   486             text=ctx.description(),
       
   487             files=set(ctx.files()) | set(filedata.keys()),
       
   488             filectxfn=filectxfn,
       
   489             user=ctx.user(),
       
   490             date=ctx.date(),
       
   491             extra=ctx.extra(),
       
   492             branch=ctx.branch(),
       
   493             editor=None)
       
   494         sucnode = memctx.commit()
       
   495         prenode = ctx.node()
       
   496         if prenode == sucnode:
       
   497             ui.debug('node %s already existed\n' % (ctx.hex()))
       
   498         else:
       
   499             replacements[ctx.node()] = sucnode
       
   500 
       
   501 def getfixers(ui):
       
   502     """Returns a map of configured fixer tools indexed by their names
       
   503 
       
   504     Each value is a Fixer object with methods that implement the behavior of the
       
   505     fixer's config suboptions. Does not validate the config values.
       
   506     """
       
   507     result = {}
       
   508     for name in fixernames(ui):
       
   509         result[name] = Fixer()
       
   510         attrs = ui.configsuboptions('fix', name)[1]
       
   511         for key in FIXER_ATTRS:
       
   512             setattr(result[name], '_' + key, attrs.get(key, ''))
       
   513     return result
       
   514 
       
   515 def fixernames(ui):
       
   516     """Returns the names of [fix] config options that have suboptions"""
       
   517     names = set()
       
   518     for k, v in ui.configitems('fix'):
       
   519         if ':' in k:
       
   520             names.add(k.split(':', 1)[0])
       
   521     return names
       
   522 
       
   523 class Fixer(object):
       
   524     """Wraps the raw config values for a fixer with methods"""
       
   525 
       
   526     def affects(self, opts, fixctx, path):
       
   527         """Should this fixer run on the file at the given path and context?"""
       
   528         return scmutil.match(fixctx, [self._fileset], opts)(path)
       
   529 
       
   530     def command(self, path, ranges):
       
   531         """A shell command to use to invoke this fixer on the given file/lines
       
   532 
       
   533         May return None if there is no appropriate command to run for the given
       
   534         parameters.
       
   535         """
       
   536         parts = [self._command.format(rootpath=path,
       
   537                                       basename=os.path.basename(path))]
       
   538         if self._linerange:
       
   539             if not ranges:
       
   540                 # No line ranges to fix, so don't run the fixer.
       
   541                 return None
       
   542             for first, last in ranges:
       
   543                 parts.append(self._linerange.format(first=first, last=last))
       
   544         return ' '.join(parts)