comparison hgext/fix.py @ 37183:ded5ea279a93

fix: new extension for automatically modifying file contents This change implements most of the corresponding proposal as discussed at the 4.4 and 4.6 sprints: https://www.mercurial-scm.org/wiki/AutomaticFormattingPlan This change notably does not include parallel execution of the formatter/fixer tools. It does allow for implementing that without affecting other areas of the code. I believe the test coverage to be good, but this is a hotbed of corner cases. Differential Revision: https://phab.mercurial-scm.org/D2897
author Danny Hooper <hooper@google.com>
date Sat, 03 Mar 2018 14:08:44 -0800
parents
children d3f1d3e4e58d
comparison
equal deleted inserted replaced
37182:559069689121 37183:ded5ea279a93
1 # fix - rewrite file content in changesets and working copy
2 #
3 # Copyright 2018 Google LLC.
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7 """rewrite file content in changesets or working copy (EXPERIMENTAL)
8
9 Provides a command that runs configured tools on the contents of modified files,
10 writing back any fixes to the working copy or replacing changesets.
11
12 Here is an example configuration that causes :hg:`fix` to apply automatic
13 formatting fixes to modified lines in C++ code::
14
15 [fix]
16 clang-format:command=clang-format --assume-filename={rootpath}
17 clang-format:linerange=--lines={first}:{last}
18 clang-format:fileset=set:**.cpp or **.hpp
19
20 The :command suboption forms the first part of the shell command that will be
21 used to fix a file. The content of the file is passed on standard input, and the
22 fixed file content is expected on standard output. If there is any output on
23 standard error, the file will not be affected. Some values may be substituted
24 into the command::
25
26 {rootpath} The path of the file being fixed, relative to the repo root
27 {basename} The name of the file being fixed, without the directory path
28
29 If the :linerange suboption is set, the tool will only be run if there are
30 changed lines in a file. The value of this suboption is appended to the shell
31 command once for every range of changed lines in the file. Some values may be
32 substituted into the command::
33
34 {first} The 1-based line number of the first line in the modified range
35 {last} The 1-based line number of the last line in the modified range
36
37 The :fileset suboption determines which files will be passed through each
38 configured tool. See :hg:`help fileset` for possible values. If there are file
39 arguments to :hg:`fix`, the intersection of these filesets is used.
40
41 There is also a configurable limit for the maximum size of file that will be
42 processed by :hg:`fix`::
43
44 [fix]
45 maxfilesize=2MB
46
47 """
48
49 from __future__ import absolute_import
50
51 import collections
52 import itertools
53 import os
54 import re
55 import subprocess
56 import sys
57
58 from mercurial.i18n import _
59 from mercurial.node import nullrev
60 from mercurial.node import wdirrev
61
62 from mercurial import (
63 cmdutil,
64 context,
65 copies,
66 error,
67 match,
68 mdiff,
69 merge,
70 obsolete,
71 posix,
72 registrar,
73 scmutil,
74 util,
75 )
76
77 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
78 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
79 # be specifying the version(s) of Mercurial they are tested with, or
80 # leave the attribute unspecified.
81 testedwith = 'ships-with-hg-core'
82
83 cmdtable = {}
84 command = registrar.command(cmdtable)
85
86 configtable = {}
87 configitem = registrar.configitem(configtable)
88
89 # Register the suboptions allowed for each configured fixer.
90 FIXER_ATTRS = ('command', 'linerange', 'fileset')
91
92 for key in FIXER_ATTRS:
93 configitem('fix', '.*(:%s)?' % key, default=None, generic=True)
94
95 # A good default size allows most source code files to be fixed, but avoids
96 # letting fixer tools choke on huge inputs, which could be surprising to the
97 # user.
98 configitem('fix', 'maxfilesize', default='2MB')
99
100 @command('fix',
101 [('', 'base', [], _('revisions to diff against (overrides automatic '
102 'selection, and applies to every revision being '
103 'fixed)'), _('REV')),
104 ('r', 'rev', [], _('revisions to fix'), _('REV')),
105 ('w', 'working-dir', False, _('fix the working directory')),
106 ('', 'whole', False, _('always fix every line of a file'))],
107 _('[OPTION]... [FILE]...'))
108 def fix(ui, repo, *pats, **opts):
109 """rewrite file content in changesets or working directory
110
111 Runs any configured tools to fix the content of files. Only affects files
112 with changes, unless file arguments are provided. Only affects changed lines
113 of files, unless the --whole flag is used. Some tools may always affect the
114 whole file regardless of --whole.
115
116 If revisions are specified with --rev, those revisions will be checked, and
117 they may be replaced with new revisions that have fixed file content. It is
118 desirable to specify all descendants of each specified revision, so that the
119 fixes propagate to the descendants. If all descendants are fixed at the same
120 time, no merging, rebasing, or evolution will be required.
121
122 If --working-dir is used, files with uncommitted changes in the working copy
123 will be fixed. If the checked-out revision is also fixed, the working
124 directory will update to the replacement revision.
125
126 When determining what lines of each file to fix at each revision, the whole
127 set of revisions being fixed is considered, so that fixes to earlier
128 revisions are not forgotten in later ones. The --base flag can be used to
129 override this default behavior, though it is not usually desirable to do so.
130 """
131 with repo.wlock(), repo.lock():
132 revstofix = getrevstofix(ui, repo, opts)
133 basectxs = getbasectxs(repo, opts, revstofix)
134 workqueue, numitems = getworkqueue(ui, repo, pats, opts, revstofix,
135 basectxs)
136 filedata = collections.defaultdict(dict)
137 replacements = {}
138 fixers = getfixers(ui)
139 # Some day this loop can become a worker pool, but for now it's easier
140 # to fix everything serially in topological order.
141 for rev, path in sorted(workqueue):
142 ctx = repo[rev]
143 olddata = ctx[path].data()
144 newdata = fixfile(ui, opts, fixers, ctx, path, basectxs[rev])
145 if newdata != olddata:
146 filedata[rev][path] = newdata
147 numitems[rev] -= 1
148 if not numitems[rev]:
149 if rev == wdirrev:
150 writeworkingdir(repo, ctx, filedata[rev], replacements)
151 else:
152 replacerev(ui, repo, ctx, filedata[rev], replacements)
153 del filedata[rev]
154
155 replacements = {prec: [succ] for prec, succ in replacements.iteritems()}
156 scmutil.cleanupnodes(repo, replacements, 'fix')
157
158 def getworkqueue(ui, repo, pats, opts, revstofix, basectxs):
159 """"Constructs the list of files to be fixed at specific revisions
160
161 It is up to the caller how to consume the work items, and the only
162 dependence between them is that replacement revisions must be committed in
163 topological order. Each work item represents a file in the working copy or
164 in some revision that should be fixed and written back to the working copy
165 or into a replacement revision.
166 """
167 workqueue = []
168 numitems = collections.defaultdict(int)
169 maxfilesize = ui.configbytes('fix', 'maxfilesize')
170 for rev in revstofix:
171 fixctx = repo[rev]
172 match = scmutil.match(fixctx, pats, opts)
173 for path in pathstofix(ui, repo, pats, opts, match, basectxs[rev],
174 fixctx):
175 if path not in fixctx:
176 continue
177 fctx = fixctx[path]
178 if fctx.islink():
179 continue
180 if fctx.size() > maxfilesize:
181 ui.warn(_('ignoring file larger than %s: %s\n') %
182 (util.bytecount(maxfilesize), path))
183 continue
184 workqueue.append((rev, path))
185 numitems[rev] += 1
186 return workqueue, numitems
187
188 def getrevstofix(ui, repo, opts):
189 """Returns the set of revision numbers that should be fixed"""
190 revs = set(scmutil.revrange(repo, opts['rev']))
191 for rev in revs:
192 checkfixablectx(ui, repo, repo[rev])
193 if revs:
194 cmdutil.checkunfinished(repo)
195 checknodescendants(repo, revs)
196 if opts.get('working_dir'):
197 revs.add(wdirrev)
198 if list(merge.mergestate.read(repo).unresolved()):
199 raise error.Abort('unresolved conflicts', hint="use 'hg resolve'")
200 if not revs:
201 raise error.Abort(
202 'no changesets specified', hint='use --rev or --working-dir')
203 return revs
204
205 def checknodescendants(repo, revs):
206 if (not obsolete.isenabled(repo, obsolete.allowunstableopt) and
207 repo.revs('(%ld::) - (%ld)', revs, revs)):
208 raise error.Abort(_('can only fix a changeset together '
209 'with all its descendants'))
210
211 def checkfixablectx(ui, repo, ctx):
212 """Aborts if the revision shouldn't be replaced with a fixed one."""
213 if not ctx.mutable():
214 raise error.Abort('can\'t fix immutable changeset %s' %
215 (scmutil.formatchangeid(ctx),))
216 if ctx.obsolete():
217 # It would be better to actually check if the revision has a successor.
218 allowdivergence = ui.configbool('experimental',
219 'evolution.allowdivergence')
220 if not allowdivergence:
221 raise error.Abort('fixing obsolete revision could cause divergence')
222
223 def pathstofix(ui, repo, pats, opts, match, basectxs, fixctx):
224 """Returns the set of files that should be fixed in a context
225
226 The result depends on the base contexts; we include any file that has
227 changed relative to any of the base contexts. Base contexts should be
228 ancestors of the context being fixed.
229 """
230 files = set()
231 for basectx in basectxs:
232 stat = repo.status(
233 basectx, fixctx, match=match, clean=bool(pats), unknown=bool(pats))
234 files.update(
235 set(itertools.chain(stat.added, stat.modified, stat.clean,
236 stat.unknown)))
237 return files
238
239 def lineranges(opts, path, basectxs, fixctx, content2):
240 """Returns the set of line ranges that should be fixed in a file
241
242 Of the form [(10, 20), (30, 40)].
243
244 This depends on the given base contexts; we must consider lines that have
245 changed versus any of the base contexts, and whether the file has been
246 renamed versus any of them.
247
248 Another way to understand this is that we exclude line ranges that are
249 common to the file in all base contexts.
250 """
251 if opts.get('whole'):
252 # Return a range containing all lines. Rely on the diff implementation's
253 # idea of how many lines are in the file, instead of reimplementing it.
254 return difflineranges('', content2)
255
256 rangeslist = []
257 for basectx in basectxs:
258 basepath = copies.pathcopies(basectx, fixctx).get(path, path)
259 if basepath in basectx:
260 content1 = basectx[basepath].data()
261 else:
262 content1 = ''
263 rangeslist.extend(difflineranges(content1, content2))
264 return unionranges(rangeslist)
265
266 def unionranges(rangeslist):
267 """Return the union of some closed intervals
268
269 >>> unionranges([])
270 []
271 >>> unionranges([(1, 100)])
272 [(1, 100)]
273 >>> unionranges([(1, 100), (1, 100)])
274 [(1, 100)]
275 >>> unionranges([(1, 100), (2, 100)])
276 [(1, 100)]
277 >>> unionranges([(1, 99), (1, 100)])
278 [(1, 100)]
279 >>> unionranges([(1, 100), (40, 60)])
280 [(1, 100)]
281 >>> unionranges([(1, 49), (50, 100)])
282 [(1, 100)]
283 >>> unionranges([(1, 48), (50, 100)])
284 [(1, 48), (50, 100)]
285 >>> unionranges([(1, 2), (3, 4), (5, 6)])
286 [(1, 6)]
287 """
288 rangeslist = sorted(set(rangeslist))
289 unioned = []
290 if rangeslist:
291 unioned, rangeslist = [rangeslist[0]], rangeslist[1:]
292 for a, b in rangeslist:
293 c, d = unioned[-1]
294 if a > d + 1:
295 unioned.append((a, b))
296 else:
297 unioned[-1] = (c, max(b, d))
298 return unioned
299
300 def difflineranges(content1, content2):
301 """Return list of line number ranges in content2 that differ from content1.
302
303 Line numbers are 1-based. The numbers are the first and last line contained
304 in the range. Single-line ranges have the same line number for the first and
305 last line. Excludes any empty ranges that result from lines that are only
306 present in content1. Relies on mdiff's idea of where the line endings are in
307 the string.
308
309 >>> lines = lambda s: '\\n'.join([c for c in s])
310 >>> difflineranges2 = lambda a, b: difflineranges(lines(a), lines(b))
311 >>> difflineranges2('', '')
312 []
313 >>> difflineranges2('a', '')
314 []
315 >>> difflineranges2('', 'A')
316 [(1, 1)]
317 >>> difflineranges2('a', 'a')
318 []
319 >>> difflineranges2('a', 'A')
320 [(1, 1)]
321 >>> difflineranges2('ab', '')
322 []
323 >>> difflineranges2('', 'AB')
324 [(1, 2)]
325 >>> difflineranges2('abc', 'ac')
326 []
327 >>> difflineranges2('ab', 'aCb')
328 [(2, 2)]
329 >>> difflineranges2('abc', 'aBc')
330 [(2, 2)]
331 >>> difflineranges2('ab', 'AB')
332 [(1, 2)]
333 >>> difflineranges2('abcde', 'aBcDe')
334 [(2, 2), (4, 4)]
335 >>> difflineranges2('abcde', 'aBCDe')
336 [(2, 4)]
337 """
338 ranges = []
339 for lines, kind in mdiff.allblocks(content1, content2):
340 firstline, lastline = lines[2:4]
341 if kind == '!' and firstline != lastline:
342 ranges.append((firstline + 1, lastline))
343 return ranges
344
345 def getbasectxs(repo, opts, revstofix):
346 """Returns a map of the base contexts for each revision
347
348 The base contexts determine which lines are considered modified when we
349 attempt to fix just the modified lines in a file.
350 """
351 # The --base flag overrides the usual logic, and we give every revision
352 # exactly the set of baserevs that the user specified.
353 if opts.get('base'):
354 baserevs = set(scmutil.revrange(repo, opts.get('base')))
355 if not baserevs:
356 baserevs = {nullrev}
357 basectxs = {repo[rev] for rev in baserevs}
358 return {rev: basectxs for rev in revstofix}
359
360 # Proceed in topological order so that we can easily determine each
361 # revision's baserevs by looking at its parents and their baserevs.
362 basectxs = collections.defaultdict(set)
363 for rev in sorted(revstofix):
364 ctx = repo[rev]
365 for pctx in ctx.parents():
366 if pctx.rev() in basectxs:
367 basectxs[rev].update(basectxs[pctx.rev()])
368 else:
369 basectxs[rev].add(pctx)
370 return basectxs
371
372 def fixfile(ui, opts, fixers, fixctx, path, basectxs):
373 """Run any configured fixers that should affect the file in this context
374
375 Returns the file content that results from applying the fixers in some order
376 starting with the file's content in the fixctx. Fixers that support line
377 ranges will affect lines that have changed relative to any of the basectxs
378 (i.e. they will only avoid lines that are common to all basectxs).
379 """
380 newdata = fixctx[path].data()
381 for fixername, fixer in fixers.iteritems():
382 if fixer.affects(opts, fixctx, path):
383 ranges = lineranges(opts, path, basectxs, fixctx, newdata)
384 command = fixer.command(path, ranges)
385 if command is None:
386 continue
387 ui.debug('subprocess: %s\n' % (command,))
388 proc = subprocess.Popen(
389 command,
390 shell=True,
391 cwd='/',
392 stdin=subprocess.PIPE,
393 stdout=subprocess.PIPE,
394 stderr=subprocess.PIPE)
395 newerdata, stderr = proc.communicate(newdata)
396 if stderr:
397 showstderr(ui, fixctx.rev(), fixername, stderr)
398 else:
399 newdata = newerdata
400 return newdata
401
402 def showstderr(ui, rev, fixername, stderr):
403 """Writes the lines of the stderr string as warnings on the ui
404
405 Uses the revision number and fixername to give more context to each line of
406 the error message. Doesn't include file names, since those take up a lot of
407 space and would tend to be included in the error message if they were
408 relevant.
409 """
410 for line in re.split('[\r\n]+', stderr):
411 if line:
412 ui.warn(('['))
413 if rev is None:
414 ui.warn(_('wdir'), label='evolve.rev')
415 else:
416 ui.warn((str(rev)), label='evolve.rev')
417 ui.warn(('] %s: %s\n') % (fixername, line))
418
419 def writeworkingdir(repo, ctx, filedata, replacements):
420 """Write new content to the working copy and check out the new p1 if any
421
422 We check out a new revision if and only if we fixed something in both the
423 working directory and its parent revision. This avoids the need for a full
424 update/merge, and means that the working directory simply isn't affected
425 unless the --working-dir flag is given.
426
427 Directly updates the dirstate for the affected files.
428 """
429 for path, data in filedata.iteritems():
430 fctx = ctx[path]
431 fctx.write(data, fctx.flags())
432 if repo.dirstate[path] == 'n':
433 repo.dirstate.normallookup(path)
434
435 oldparentnodes = repo.dirstate.parents()
436 newparentnodes = [replacements.get(n, n) for n in oldparentnodes]
437 if newparentnodes != oldparentnodes:
438 repo.setparents(*newparentnodes)
439
440 def replacerev(ui, repo, ctx, filedata, replacements):
441 """Commit a new revision like the given one, but with file content changes
442
443 "ctx" is the original revision to be replaced by a modified one.
444
445 "filedata" is a dict that maps paths to their new file content. All other
446 paths will be recreated from the original revision without changes.
447 "filedata" may contain paths that didn't exist in the original revision;
448 they will be added.
449
450 "replacements" is a dict that maps a single node to a single node, and it is
451 updated to indicate the original revision is replaced by the newly created
452 one. No entry is added if the replacement's node already exists.
453
454 The new revision has the same parents as the old one, unless those parents
455 have already been replaced, in which case those replacements are the parents
456 of this new revision. Thus, if revisions are replaced in topological order,
457 there is no need to rebase them into the original topology later.
458 """
459
460 p1rev, p2rev = repo.changelog.parentrevs(ctx.rev())
461 p1ctx, p2ctx = repo[p1rev], repo[p2rev]
462 newp1node = replacements.get(p1ctx.node(), p1ctx.node())
463 newp2node = replacements.get(p2ctx.node(), p2ctx.node())
464
465 def filectxfn(repo, memctx, path):
466 if path not in ctx:
467 return None
468 fctx = ctx[path]
469 copied = fctx.renamed()
470 if copied:
471 copied = copied[0]
472 return context.memfilectx(
473 repo,
474 memctx,
475 path=fctx.path(),
476 data=filedata.get(path, fctx.data()),
477 islink=fctx.islink(),
478 isexec=fctx.isexec(),
479 copied=copied)
480
481 overrides = {('phases', 'new-commit'): ctx.phase()}
482 with ui.configoverride(overrides, source='fix'):
483 memctx = context.memctx(
484 repo,
485 parents=(newp1node, newp2node),
486 text=ctx.description(),
487 files=set(ctx.files()) | set(filedata.keys()),
488 filectxfn=filectxfn,
489 user=ctx.user(),
490 date=ctx.date(),
491 extra=ctx.extra(),
492 branch=ctx.branch(),
493 editor=None)
494 sucnode = memctx.commit()
495 prenode = ctx.node()
496 if prenode == sucnode:
497 ui.debug('node %s already existed\n' % (ctx.hex()))
498 else:
499 replacements[ctx.node()] = sucnode
500
501 def getfixers(ui):
502 """Returns a map of configured fixer tools indexed by their names
503
504 Each value is a Fixer object with methods that implement the behavior of the
505 fixer's config suboptions. Does not validate the config values.
506 """
507 result = {}
508 for name in fixernames(ui):
509 result[name] = Fixer()
510 attrs = ui.configsuboptions('fix', name)[1]
511 for key in FIXER_ATTRS:
512 setattr(result[name], '_' + key, attrs.get(key, ''))
513 return result
514
515 def fixernames(ui):
516 """Returns the names of [fix] config options that have suboptions"""
517 names = set()
518 for k, v in ui.configitems('fix'):
519 if ':' in k:
520 names.add(k.split(':', 1)[0])
521 return names
522
523 class Fixer(object):
524 """Wraps the raw config values for a fixer with methods"""
525
526 def affects(self, opts, fixctx, path):
527 """Should this fixer run on the file at the given path and context?"""
528 return scmutil.match(fixctx, [self._fileset], opts)(path)
529
530 def command(self, path, ranges):
531 """A shell command to use to invoke this fixer on the given file/lines
532
533 May return None if there is no appropriate command to run for the given
534 parameters.
535 """
536 parts = [self._command.format(rootpath=path,
537 basename=os.path.basename(path))]
538 if self._linerange:
539 if not ranges:
540 # No line ranges to fix, so don't run the fixer.
541 return None
542 for first, last in ranges:
543 parts.append(self._linerange.format(first=first, last=last))
544 return ' '.join(parts)