|
1 # fix - rewrite file content in changesets and working copy |
|
2 # |
|
3 # Copyright 2018 Google LLC. |
|
4 # |
|
5 # This software may be used and distributed according to the terms of the |
|
6 # GNU General Public License version 2 or any later version. |
|
7 """rewrite file content in changesets or working copy (EXPERIMENTAL) |
|
8 |
|
9 Provides a command that runs configured tools on the contents of modified files, |
|
10 writing back any fixes to the working copy or replacing changesets. |
|
11 |
|
12 Here is an example configuration that causes :hg:`fix` to apply automatic |
|
13 formatting fixes to modified lines in C++ code:: |
|
14 |
|
15 [fix] |
|
16 clang-format:command=clang-format --assume-filename={rootpath} |
|
17 clang-format:linerange=--lines={first}:{last} |
|
18 clang-format:fileset=set:**.cpp or **.hpp |
|
19 |
|
20 The :command suboption forms the first part of the shell command that will be |
|
21 used to fix a file. The content of the file is passed on standard input, and the |
|
22 fixed file content is expected on standard output. If there is any output on |
|
23 standard error, the file will not be affected. Some values may be substituted |
|
24 into the command:: |
|
25 |
|
26 {rootpath} The path of the file being fixed, relative to the repo root |
|
27 {basename} The name of the file being fixed, without the directory path |
|
28 |
|
29 If the :linerange suboption is set, the tool will only be run if there are |
|
30 changed lines in a file. The value of this suboption is appended to the shell |
|
31 command once for every range of changed lines in the file. Some values may be |
|
32 substituted into the command:: |
|
33 |
|
34 {first} The 1-based line number of the first line in the modified range |
|
35 {last} The 1-based line number of the last line in the modified range |
|
36 |
|
37 The :fileset suboption determines which files will be passed through each |
|
38 configured tool. See :hg:`help fileset` for possible values. If there are file |
|
39 arguments to :hg:`fix`, the intersection of these filesets is used. |
|
40 |
|
41 There is also a configurable limit for the maximum size of file that will be |
|
42 processed by :hg:`fix`:: |
|
43 |
|
44 [fix] |
|
45 maxfilesize=2MB |
|
46 |
|
47 """ |
|
48 |
|
49 from __future__ import absolute_import |
|
50 |
|
51 import collections |
|
52 import itertools |
|
53 import os |
|
54 import re |
|
55 import subprocess |
|
56 import sys |
|
57 |
|
58 from mercurial.i18n import _ |
|
59 from mercurial.node import nullrev |
|
60 from mercurial.node import wdirrev |
|
61 |
|
62 from mercurial import ( |
|
63 cmdutil, |
|
64 context, |
|
65 copies, |
|
66 error, |
|
67 match, |
|
68 mdiff, |
|
69 merge, |
|
70 obsolete, |
|
71 posix, |
|
72 registrar, |
|
73 scmutil, |
|
74 util, |
|
75 ) |
|
76 |
|
77 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for |
|
78 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should |
|
79 # be specifying the version(s) of Mercurial they are tested with, or |
|
80 # leave the attribute unspecified. |
|
81 testedwith = 'ships-with-hg-core' |
|
82 |
|
83 cmdtable = {} |
|
84 command = registrar.command(cmdtable) |
|
85 |
|
86 configtable = {} |
|
87 configitem = registrar.configitem(configtable) |
|
88 |
|
89 # Register the suboptions allowed for each configured fixer. |
|
90 FIXER_ATTRS = ('command', 'linerange', 'fileset') |
|
91 |
|
92 for key in FIXER_ATTRS: |
|
93 configitem('fix', '.*(:%s)?' % key, default=None, generic=True) |
|
94 |
|
95 # A good default size allows most source code files to be fixed, but avoids |
|
96 # letting fixer tools choke on huge inputs, which could be surprising to the |
|
97 # user. |
|
98 configitem('fix', 'maxfilesize', default='2MB') |
|
99 |
|
100 @command('fix', |
|
101 [('', 'base', [], _('revisions to diff against (overrides automatic ' |
|
102 'selection, and applies to every revision being ' |
|
103 'fixed)'), _('REV')), |
|
104 ('r', 'rev', [], _('revisions to fix'), _('REV')), |
|
105 ('w', 'working-dir', False, _('fix the working directory')), |
|
106 ('', 'whole', False, _('always fix every line of a file'))], |
|
107 _('[OPTION]... [FILE]...')) |
|
108 def fix(ui, repo, *pats, **opts): |
|
109 """rewrite file content in changesets or working directory |
|
110 |
|
111 Runs any configured tools to fix the content of files. Only affects files |
|
112 with changes, unless file arguments are provided. Only affects changed lines |
|
113 of files, unless the --whole flag is used. Some tools may always affect the |
|
114 whole file regardless of --whole. |
|
115 |
|
116 If revisions are specified with --rev, those revisions will be checked, and |
|
117 they may be replaced with new revisions that have fixed file content. It is |
|
118 desirable to specify all descendants of each specified revision, so that the |
|
119 fixes propagate to the descendants. If all descendants are fixed at the same |
|
120 time, no merging, rebasing, or evolution will be required. |
|
121 |
|
122 If --working-dir is used, files with uncommitted changes in the working copy |
|
123 will be fixed. If the checked-out revision is also fixed, the working |
|
124 directory will update to the replacement revision. |
|
125 |
|
126 When determining what lines of each file to fix at each revision, the whole |
|
127 set of revisions being fixed is considered, so that fixes to earlier |
|
128 revisions are not forgotten in later ones. The --base flag can be used to |
|
129 override this default behavior, though it is not usually desirable to do so. |
|
130 """ |
|
131 with repo.wlock(), repo.lock(): |
|
132 revstofix = getrevstofix(ui, repo, opts) |
|
133 basectxs = getbasectxs(repo, opts, revstofix) |
|
134 workqueue, numitems = getworkqueue(ui, repo, pats, opts, revstofix, |
|
135 basectxs) |
|
136 filedata = collections.defaultdict(dict) |
|
137 replacements = {} |
|
138 fixers = getfixers(ui) |
|
139 # Some day this loop can become a worker pool, but for now it's easier |
|
140 # to fix everything serially in topological order. |
|
141 for rev, path in sorted(workqueue): |
|
142 ctx = repo[rev] |
|
143 olddata = ctx[path].data() |
|
144 newdata = fixfile(ui, opts, fixers, ctx, path, basectxs[rev]) |
|
145 if newdata != olddata: |
|
146 filedata[rev][path] = newdata |
|
147 numitems[rev] -= 1 |
|
148 if not numitems[rev]: |
|
149 if rev == wdirrev: |
|
150 writeworkingdir(repo, ctx, filedata[rev], replacements) |
|
151 else: |
|
152 replacerev(ui, repo, ctx, filedata[rev], replacements) |
|
153 del filedata[rev] |
|
154 |
|
155 replacements = {prec: [succ] for prec, succ in replacements.iteritems()} |
|
156 scmutil.cleanupnodes(repo, replacements, 'fix') |
|
157 |
|
158 def getworkqueue(ui, repo, pats, opts, revstofix, basectxs): |
|
159 """"Constructs the list of files to be fixed at specific revisions |
|
160 |
|
161 It is up to the caller how to consume the work items, and the only |
|
162 dependence between them is that replacement revisions must be committed in |
|
163 topological order. Each work item represents a file in the working copy or |
|
164 in some revision that should be fixed and written back to the working copy |
|
165 or into a replacement revision. |
|
166 """ |
|
167 workqueue = [] |
|
168 numitems = collections.defaultdict(int) |
|
169 maxfilesize = ui.configbytes('fix', 'maxfilesize') |
|
170 for rev in revstofix: |
|
171 fixctx = repo[rev] |
|
172 match = scmutil.match(fixctx, pats, opts) |
|
173 for path in pathstofix(ui, repo, pats, opts, match, basectxs[rev], |
|
174 fixctx): |
|
175 if path not in fixctx: |
|
176 continue |
|
177 fctx = fixctx[path] |
|
178 if fctx.islink(): |
|
179 continue |
|
180 if fctx.size() > maxfilesize: |
|
181 ui.warn(_('ignoring file larger than %s: %s\n') % |
|
182 (util.bytecount(maxfilesize), path)) |
|
183 continue |
|
184 workqueue.append((rev, path)) |
|
185 numitems[rev] += 1 |
|
186 return workqueue, numitems |
|
187 |
|
188 def getrevstofix(ui, repo, opts): |
|
189 """Returns the set of revision numbers that should be fixed""" |
|
190 revs = set(scmutil.revrange(repo, opts['rev'])) |
|
191 for rev in revs: |
|
192 checkfixablectx(ui, repo, repo[rev]) |
|
193 if revs: |
|
194 cmdutil.checkunfinished(repo) |
|
195 checknodescendants(repo, revs) |
|
196 if opts.get('working_dir'): |
|
197 revs.add(wdirrev) |
|
198 if list(merge.mergestate.read(repo).unresolved()): |
|
199 raise error.Abort('unresolved conflicts', hint="use 'hg resolve'") |
|
200 if not revs: |
|
201 raise error.Abort( |
|
202 'no changesets specified', hint='use --rev or --working-dir') |
|
203 return revs |
|
204 |
|
205 def checknodescendants(repo, revs): |
|
206 if (not obsolete.isenabled(repo, obsolete.allowunstableopt) and |
|
207 repo.revs('(%ld::) - (%ld)', revs, revs)): |
|
208 raise error.Abort(_('can only fix a changeset together ' |
|
209 'with all its descendants')) |
|
210 |
|
211 def checkfixablectx(ui, repo, ctx): |
|
212 """Aborts if the revision shouldn't be replaced with a fixed one.""" |
|
213 if not ctx.mutable(): |
|
214 raise error.Abort('can\'t fix immutable changeset %s' % |
|
215 (scmutil.formatchangeid(ctx),)) |
|
216 if ctx.obsolete(): |
|
217 # It would be better to actually check if the revision has a successor. |
|
218 allowdivergence = ui.configbool('experimental', |
|
219 'evolution.allowdivergence') |
|
220 if not allowdivergence: |
|
221 raise error.Abort('fixing obsolete revision could cause divergence') |
|
222 |
|
223 def pathstofix(ui, repo, pats, opts, match, basectxs, fixctx): |
|
224 """Returns the set of files that should be fixed in a context |
|
225 |
|
226 The result depends on the base contexts; we include any file that has |
|
227 changed relative to any of the base contexts. Base contexts should be |
|
228 ancestors of the context being fixed. |
|
229 """ |
|
230 files = set() |
|
231 for basectx in basectxs: |
|
232 stat = repo.status( |
|
233 basectx, fixctx, match=match, clean=bool(pats), unknown=bool(pats)) |
|
234 files.update( |
|
235 set(itertools.chain(stat.added, stat.modified, stat.clean, |
|
236 stat.unknown))) |
|
237 return files |
|
238 |
|
239 def lineranges(opts, path, basectxs, fixctx, content2): |
|
240 """Returns the set of line ranges that should be fixed in a file |
|
241 |
|
242 Of the form [(10, 20), (30, 40)]. |
|
243 |
|
244 This depends on the given base contexts; we must consider lines that have |
|
245 changed versus any of the base contexts, and whether the file has been |
|
246 renamed versus any of them. |
|
247 |
|
248 Another way to understand this is that we exclude line ranges that are |
|
249 common to the file in all base contexts. |
|
250 """ |
|
251 if opts.get('whole'): |
|
252 # Return a range containing all lines. Rely on the diff implementation's |
|
253 # idea of how many lines are in the file, instead of reimplementing it. |
|
254 return difflineranges('', content2) |
|
255 |
|
256 rangeslist = [] |
|
257 for basectx in basectxs: |
|
258 basepath = copies.pathcopies(basectx, fixctx).get(path, path) |
|
259 if basepath in basectx: |
|
260 content1 = basectx[basepath].data() |
|
261 else: |
|
262 content1 = '' |
|
263 rangeslist.extend(difflineranges(content1, content2)) |
|
264 return unionranges(rangeslist) |
|
265 |
|
266 def unionranges(rangeslist): |
|
267 """Return the union of some closed intervals |
|
268 |
|
269 >>> unionranges([]) |
|
270 [] |
|
271 >>> unionranges([(1, 100)]) |
|
272 [(1, 100)] |
|
273 >>> unionranges([(1, 100), (1, 100)]) |
|
274 [(1, 100)] |
|
275 >>> unionranges([(1, 100), (2, 100)]) |
|
276 [(1, 100)] |
|
277 >>> unionranges([(1, 99), (1, 100)]) |
|
278 [(1, 100)] |
|
279 >>> unionranges([(1, 100), (40, 60)]) |
|
280 [(1, 100)] |
|
281 >>> unionranges([(1, 49), (50, 100)]) |
|
282 [(1, 100)] |
|
283 >>> unionranges([(1, 48), (50, 100)]) |
|
284 [(1, 48), (50, 100)] |
|
285 >>> unionranges([(1, 2), (3, 4), (5, 6)]) |
|
286 [(1, 6)] |
|
287 """ |
|
288 rangeslist = sorted(set(rangeslist)) |
|
289 unioned = [] |
|
290 if rangeslist: |
|
291 unioned, rangeslist = [rangeslist[0]], rangeslist[1:] |
|
292 for a, b in rangeslist: |
|
293 c, d = unioned[-1] |
|
294 if a > d + 1: |
|
295 unioned.append((a, b)) |
|
296 else: |
|
297 unioned[-1] = (c, max(b, d)) |
|
298 return unioned |
|
299 |
|
300 def difflineranges(content1, content2): |
|
301 """Return list of line number ranges in content2 that differ from content1. |
|
302 |
|
303 Line numbers are 1-based. The numbers are the first and last line contained |
|
304 in the range. Single-line ranges have the same line number for the first and |
|
305 last line. Excludes any empty ranges that result from lines that are only |
|
306 present in content1. Relies on mdiff's idea of where the line endings are in |
|
307 the string. |
|
308 |
|
309 >>> lines = lambda s: '\\n'.join([c for c in s]) |
|
310 >>> difflineranges2 = lambda a, b: difflineranges(lines(a), lines(b)) |
|
311 >>> difflineranges2('', '') |
|
312 [] |
|
313 >>> difflineranges2('a', '') |
|
314 [] |
|
315 >>> difflineranges2('', 'A') |
|
316 [(1, 1)] |
|
317 >>> difflineranges2('a', 'a') |
|
318 [] |
|
319 >>> difflineranges2('a', 'A') |
|
320 [(1, 1)] |
|
321 >>> difflineranges2('ab', '') |
|
322 [] |
|
323 >>> difflineranges2('', 'AB') |
|
324 [(1, 2)] |
|
325 >>> difflineranges2('abc', 'ac') |
|
326 [] |
|
327 >>> difflineranges2('ab', 'aCb') |
|
328 [(2, 2)] |
|
329 >>> difflineranges2('abc', 'aBc') |
|
330 [(2, 2)] |
|
331 >>> difflineranges2('ab', 'AB') |
|
332 [(1, 2)] |
|
333 >>> difflineranges2('abcde', 'aBcDe') |
|
334 [(2, 2), (4, 4)] |
|
335 >>> difflineranges2('abcde', 'aBCDe') |
|
336 [(2, 4)] |
|
337 """ |
|
338 ranges = [] |
|
339 for lines, kind in mdiff.allblocks(content1, content2): |
|
340 firstline, lastline = lines[2:4] |
|
341 if kind == '!' and firstline != lastline: |
|
342 ranges.append((firstline + 1, lastline)) |
|
343 return ranges |
|
344 |
|
345 def getbasectxs(repo, opts, revstofix): |
|
346 """Returns a map of the base contexts for each revision |
|
347 |
|
348 The base contexts determine which lines are considered modified when we |
|
349 attempt to fix just the modified lines in a file. |
|
350 """ |
|
351 # The --base flag overrides the usual logic, and we give every revision |
|
352 # exactly the set of baserevs that the user specified. |
|
353 if opts.get('base'): |
|
354 baserevs = set(scmutil.revrange(repo, opts.get('base'))) |
|
355 if not baserevs: |
|
356 baserevs = {nullrev} |
|
357 basectxs = {repo[rev] for rev in baserevs} |
|
358 return {rev: basectxs for rev in revstofix} |
|
359 |
|
360 # Proceed in topological order so that we can easily determine each |
|
361 # revision's baserevs by looking at its parents and their baserevs. |
|
362 basectxs = collections.defaultdict(set) |
|
363 for rev in sorted(revstofix): |
|
364 ctx = repo[rev] |
|
365 for pctx in ctx.parents(): |
|
366 if pctx.rev() in basectxs: |
|
367 basectxs[rev].update(basectxs[pctx.rev()]) |
|
368 else: |
|
369 basectxs[rev].add(pctx) |
|
370 return basectxs |
|
371 |
|
372 def fixfile(ui, opts, fixers, fixctx, path, basectxs): |
|
373 """Run any configured fixers that should affect the file in this context |
|
374 |
|
375 Returns the file content that results from applying the fixers in some order |
|
376 starting with the file's content in the fixctx. Fixers that support line |
|
377 ranges will affect lines that have changed relative to any of the basectxs |
|
378 (i.e. they will only avoid lines that are common to all basectxs). |
|
379 """ |
|
380 newdata = fixctx[path].data() |
|
381 for fixername, fixer in fixers.iteritems(): |
|
382 if fixer.affects(opts, fixctx, path): |
|
383 ranges = lineranges(opts, path, basectxs, fixctx, newdata) |
|
384 command = fixer.command(path, ranges) |
|
385 if command is None: |
|
386 continue |
|
387 ui.debug('subprocess: %s\n' % (command,)) |
|
388 proc = subprocess.Popen( |
|
389 command, |
|
390 shell=True, |
|
391 cwd='/', |
|
392 stdin=subprocess.PIPE, |
|
393 stdout=subprocess.PIPE, |
|
394 stderr=subprocess.PIPE) |
|
395 newerdata, stderr = proc.communicate(newdata) |
|
396 if stderr: |
|
397 showstderr(ui, fixctx.rev(), fixername, stderr) |
|
398 else: |
|
399 newdata = newerdata |
|
400 return newdata |
|
401 |
|
402 def showstderr(ui, rev, fixername, stderr): |
|
403 """Writes the lines of the stderr string as warnings on the ui |
|
404 |
|
405 Uses the revision number and fixername to give more context to each line of |
|
406 the error message. Doesn't include file names, since those take up a lot of |
|
407 space and would tend to be included in the error message if they were |
|
408 relevant. |
|
409 """ |
|
410 for line in re.split('[\r\n]+', stderr): |
|
411 if line: |
|
412 ui.warn(('[')) |
|
413 if rev is None: |
|
414 ui.warn(_('wdir'), label='evolve.rev') |
|
415 else: |
|
416 ui.warn((str(rev)), label='evolve.rev') |
|
417 ui.warn(('] %s: %s\n') % (fixername, line)) |
|
418 |
|
419 def writeworkingdir(repo, ctx, filedata, replacements): |
|
420 """Write new content to the working copy and check out the new p1 if any |
|
421 |
|
422 We check out a new revision if and only if we fixed something in both the |
|
423 working directory and its parent revision. This avoids the need for a full |
|
424 update/merge, and means that the working directory simply isn't affected |
|
425 unless the --working-dir flag is given. |
|
426 |
|
427 Directly updates the dirstate for the affected files. |
|
428 """ |
|
429 for path, data in filedata.iteritems(): |
|
430 fctx = ctx[path] |
|
431 fctx.write(data, fctx.flags()) |
|
432 if repo.dirstate[path] == 'n': |
|
433 repo.dirstate.normallookup(path) |
|
434 |
|
435 oldparentnodes = repo.dirstate.parents() |
|
436 newparentnodes = [replacements.get(n, n) for n in oldparentnodes] |
|
437 if newparentnodes != oldparentnodes: |
|
438 repo.setparents(*newparentnodes) |
|
439 |
|
440 def replacerev(ui, repo, ctx, filedata, replacements): |
|
441 """Commit a new revision like the given one, but with file content changes |
|
442 |
|
443 "ctx" is the original revision to be replaced by a modified one. |
|
444 |
|
445 "filedata" is a dict that maps paths to their new file content. All other |
|
446 paths will be recreated from the original revision without changes. |
|
447 "filedata" may contain paths that didn't exist in the original revision; |
|
448 they will be added. |
|
449 |
|
450 "replacements" is a dict that maps a single node to a single node, and it is |
|
451 updated to indicate the original revision is replaced by the newly created |
|
452 one. No entry is added if the replacement's node already exists. |
|
453 |
|
454 The new revision has the same parents as the old one, unless those parents |
|
455 have already been replaced, in which case those replacements are the parents |
|
456 of this new revision. Thus, if revisions are replaced in topological order, |
|
457 there is no need to rebase them into the original topology later. |
|
458 """ |
|
459 |
|
460 p1rev, p2rev = repo.changelog.parentrevs(ctx.rev()) |
|
461 p1ctx, p2ctx = repo[p1rev], repo[p2rev] |
|
462 newp1node = replacements.get(p1ctx.node(), p1ctx.node()) |
|
463 newp2node = replacements.get(p2ctx.node(), p2ctx.node()) |
|
464 |
|
465 def filectxfn(repo, memctx, path): |
|
466 if path not in ctx: |
|
467 return None |
|
468 fctx = ctx[path] |
|
469 copied = fctx.renamed() |
|
470 if copied: |
|
471 copied = copied[0] |
|
472 return context.memfilectx( |
|
473 repo, |
|
474 memctx, |
|
475 path=fctx.path(), |
|
476 data=filedata.get(path, fctx.data()), |
|
477 islink=fctx.islink(), |
|
478 isexec=fctx.isexec(), |
|
479 copied=copied) |
|
480 |
|
481 overrides = {('phases', 'new-commit'): ctx.phase()} |
|
482 with ui.configoverride(overrides, source='fix'): |
|
483 memctx = context.memctx( |
|
484 repo, |
|
485 parents=(newp1node, newp2node), |
|
486 text=ctx.description(), |
|
487 files=set(ctx.files()) | set(filedata.keys()), |
|
488 filectxfn=filectxfn, |
|
489 user=ctx.user(), |
|
490 date=ctx.date(), |
|
491 extra=ctx.extra(), |
|
492 branch=ctx.branch(), |
|
493 editor=None) |
|
494 sucnode = memctx.commit() |
|
495 prenode = ctx.node() |
|
496 if prenode == sucnode: |
|
497 ui.debug('node %s already existed\n' % (ctx.hex())) |
|
498 else: |
|
499 replacements[ctx.node()] = sucnode |
|
500 |
|
501 def getfixers(ui): |
|
502 """Returns a map of configured fixer tools indexed by their names |
|
503 |
|
504 Each value is a Fixer object with methods that implement the behavior of the |
|
505 fixer's config suboptions. Does not validate the config values. |
|
506 """ |
|
507 result = {} |
|
508 for name in fixernames(ui): |
|
509 result[name] = Fixer() |
|
510 attrs = ui.configsuboptions('fix', name)[1] |
|
511 for key in FIXER_ATTRS: |
|
512 setattr(result[name], '_' + key, attrs.get(key, '')) |
|
513 return result |
|
514 |
|
515 def fixernames(ui): |
|
516 """Returns the names of [fix] config options that have suboptions""" |
|
517 names = set() |
|
518 for k, v in ui.configitems('fix'): |
|
519 if ':' in k: |
|
520 names.add(k.split(':', 1)[0]) |
|
521 return names |
|
522 |
|
523 class Fixer(object): |
|
524 """Wraps the raw config values for a fixer with methods""" |
|
525 |
|
526 def affects(self, opts, fixctx, path): |
|
527 """Should this fixer run on the file at the given path and context?""" |
|
528 return scmutil.match(fixctx, [self._fileset], opts)(path) |
|
529 |
|
530 def command(self, path, ranges): |
|
531 """A shell command to use to invoke this fixer on the given file/lines |
|
532 |
|
533 May return None if there is no appropriate command to run for the given |
|
534 parameters. |
|
535 """ |
|
536 parts = [self._command.format(rootpath=path, |
|
537 basename=os.path.basename(path))] |
|
538 if self._linerange: |
|
539 if not ranges: |
|
540 # No line ranges to fix, so don't run the fixer. |
|
541 return None |
|
542 for first, last in ranges: |
|
543 parts.append(self._linerange.format(first=first, last=last)) |
|
544 return ' '.join(parts) |