changeset 42194:0da689a60163

fix: allow fixer tools to return metadata in addition to the file content With this change, fixer tools can be configured to output a JSON object that will be parsed and passed to hooks that can be used to print summaries of what code was formatted or perform other post-fixing work. The motivation for this change is to allow parallel executions of a "meta-formatter" tool to report back statistics, which are then aggregated and processed after all formatting has completed. Providing an extensible mechanism inside fix.py is far simpler, and more portable, than trying to make a tool like this communicate through some other channel. Differential Revision: https://phab.mercurial-scm.org/D6167
author Danny Hooper <hooper@google.com>
date Thu, 21 Mar 2019 18:32:45 -0700
parents 7c0ece3cd3ee
children 7edff8dd46a7
files hgext/fix.py tests/test-fix-metadata.t tests/test-fix.t
diffstat 3 files changed, 188 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/fix.py	Tue Apr 23 15:49:17 2019 -0400
+++ b/hgext/fix.py	Thu Mar 21 18:32:45 2019 -0700
@@ -72,12 +72,43 @@
 To account for changes made by each tool, the line numbers used for incremental
 formatting are recomputed before executing the next tool. So, each tool may see
 different values for the arguments added by the :linerange suboption.
+
+Each fixer tool is allowed to return some metadata in addition to the fixed file
+content. The metadata must be placed before the file content on stdout,
+separated from the file content by a zero byte. The metadata is parsed as a JSON
+value (so, it should be UTF-8 encoded and contain no zero bytes). A fixer tool
+is expected to produce this metadata encoding if and only if the :metadata
+suboption is true::
+
+  [fix]
+  tool:command = tool --prepend-json-metadata
+  tool:metadata = true
+
+The metadata values are passed to hooks, which can be used to print summaries or
+perform other post-fixing work. The supported hooks are::
+
+  "postfixfile"
+    Run once for each file in each revision where any fixer tools made changes
+    to the file content. Provides "$HG_REV" and "$HG_PATH" to identify the file,
+    and "$HG_METADATA" with a map of fixer names to metadata values from fixer
+    tools that affected the file. Fixer tools that didn't affect the file have a
+    valueof None. Only fixer tools that executed are present in the metadata.
+
+  "postfix"
+    Run once after all files and revisions have been handled. Provides
+    "$HG_REPLACEMENTS" with information about what revisions were created and
+    made obsolete. Provides a boolean "$HG_WDIRWRITTEN" to indicate whether any
+    files in the working copy were updated. Provides a list "$HG_METADATA"
+    mapping fixer tool names to lists of metadata values returned from
+    executions that modified a file. This aggregates the same metadata
+    previously passed to the "postfixfile" hook.
 """
 
 from __future__ import absolute_import
 
 import collections
 import itertools
+import json
 import os
 import re
 import subprocess
@@ -117,13 +148,14 @@
 configtable = {}
 configitem = registrar.configitem(configtable)
 
-# Register the suboptions allowed for each configured fixer.
+# Register the suboptions allowed for each configured fixer, and default values.
 FIXER_ATTRS = {
     'command': None,
     'linerange': None,
     'fileset': None,
     'pattern': None,
     'priority': 0,
+    'metadata': False,
 }
 
 for key, default in FIXER_ATTRS.items():
@@ -201,10 +233,12 @@
             for rev, path in items:
                 ctx = repo[rev]
                 olddata = ctx[path].data()
-                newdata = fixfile(ui, opts, fixers, ctx, path, basectxs[rev])
+                metadata, newdata = fixfile(ui, opts, fixers, ctx, path,
+                                            basectxs[rev])
                 # Don't waste memory/time passing unchanged content back, but
                 # produce one result per item either way.
-                yield (rev, path, newdata if newdata != olddata else None)
+                yield (rev, path, metadata,
+                       newdata if newdata != olddata else None)
         results = worker.worker(ui, 1.0, getfixes, tuple(), workqueue,
                                 threadsafe=False)
 
@@ -215,15 +249,25 @@
         # the tests deterministic. It might also be considered a feature since
         # it makes the results more easily reproducible.
         filedata = collections.defaultdict(dict)
+        aggregatemetadata = collections.defaultdict(list)
         replacements = {}
         wdirwritten = False
         commitorder = sorted(revstofix, reverse=True)
         with ui.makeprogress(topic=_('fixing'), unit=_('files'),
                              total=sum(numitems.values())) as progress:
-            for rev, path, newdata in results:
+            for rev, path, filerevmetadata, newdata in results:
                 progress.increment(item=path)
+                for fixername, fixermetadata in filerevmetadata.items():
+                    aggregatemetadata[fixername].append(fixermetadata)
                 if newdata is not None:
                     filedata[rev][path] = newdata
+                    hookargs = {
+                      'rev': rev,
+                      'path': path,
+                      'metadata': filerevmetadata,
+                    }
+                    repo.hook('postfixfile', throw=False,
+                              **pycompat.strkwargs(hookargs))
                 numitems[rev] -= 1
                 # Apply the fixes for this and any other revisions that are
                 # ready and sitting at the front of the queue. Using a loop here
@@ -240,6 +284,12 @@
                     del filedata[rev]
 
         cleanup(repo, replacements, wdirwritten)
+        hookargs = {
+            'replacements': replacements,
+            'wdirwritten': wdirwritten,
+            'metadata': aggregatemetadata,
+        }
+        repo.hook('postfix', throw=True, **pycompat.strkwargs(hookargs))
 
 def cleanup(repo, replacements, wdirwritten):
     """Calls scmutil.cleanupnodes() with the given replacements.
@@ -491,6 +541,7 @@
     A fixer tool's stdout will become the file's new content if and only if it
     exits with code zero.
     """
+    metadata = {}
     newdata = fixctx[path].data()
     for fixername, fixer in fixers.iteritems():
         if fixer.affects(opts, fixctx, path):
@@ -506,9 +557,20 @@
                 stdin=subprocess.PIPE,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE)
-            newerdata, stderr = proc.communicate(newdata)
+            stdout, stderr = proc.communicate(newdata)
             if stderr:
                 showstderr(ui, fixctx.rev(), fixername, stderr)
+            newerdata = stdout
+            if fixer.shouldoutputmetadata():
+                try:
+                    metadatajson, newerdata = stdout.split('\0', 1)
+                    metadata[fixername] = json.loads(metadatajson)
+                except ValueError:
+                    ui.warn(_('ignored invalid output from fixer tool: %s\n') %
+                            (fixername,))
+                    continue
+            else:
+                metadata[fixername] = None
             if proc.returncode == 0:
                 newdata = newerdata
             else:
@@ -519,7 +581,7 @@
                     ui, _('no fixes will be applied'),
                     hint=_('use --config fix.failure=continue to apply any '
                            'successful fixes anyway'))
-    return newdata
+    return metadata, newdata
 
 def showstderr(ui, rev, fixername, stderr):
     """Writes the lines of the stderr string as warnings on the ui
@@ -667,6 +729,10 @@
         """Should this fixer run on the file at the given path and context?"""
         return scmutil.match(fixctx, [self._pattern], opts)(path)
 
+    def shouldoutputmetadata(self):
+        """Should the stdout of this fixer start with JSON and a null byte?"""
+        return self._metadata
+
     def command(self, ui, path, rangesfn):
         """A shell command to use to invoke this fixer on the given file/lines
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-fix-metadata.t	Thu Mar 21 18:32:45 2019 -0700
@@ -0,0 +1,86 @@
+A python hook for "hg fix" that prints out the number of files and revisions
+that were affected, along with which fixer tools were applied. Also checks how
+many times it sees a specific key generated by one of the fixer tools defined
+below.
+
+  $ cat >> $TESTTMP/postfixhook.py <<EOF
+  > import collections
+  > def file(ui, repo, rev=None, path='', metadata=None, **kwargs):
+  >   ui.status('fixed %s in revision %d using %s\n' %
+  >             (path, rev, ', '.join(metadata.keys())))
+  > def summarize(ui, repo, replacements=None, wdirwritten=False,
+  >               metadata=None, **kwargs):
+  >     counts = collections.defaultdict(int)
+  >     keys = 0
+  >     for fixername, metadatalist in metadata.items():
+  >         for metadata in metadatalist:
+  >             if metadata is None:
+  >                 continue
+  >             counts[fixername] += 1
+  >             if 'key' in metadata:
+  >                 keys += 1
+  >     ui.status('saw "key" %d times\n' % (keys,))
+  >     for name, count in sorted(counts.items()):
+  >         ui.status('fixed %d files with %s\n' % (count, name))
+  >     if replacements:
+  >         ui.status('fixed %d revisions\n' % (len(replacements),))
+  >     if wdirwritten:
+  >         ui.status('fixed the working copy\n')
+  > EOF
+
+Some mock output for fixer tools that demonstrate what could go wrong with
+expecting the metadata output format.
+
+  $ printf 'new content\n' > $TESTTMP/missing
+  $ printf 'not valid json\0new content\n' > $TESTTMP/invalid
+  $ printf '{"key": "value"}\0new content\n' > $TESTTMP/valid
+
+Configure some fixer tools based on the output defined above, and enable the
+hooks defined above. Disable parallelism to make output of the parallel file
+processing phase stable.
+
+  $ cat >> $HGRCPATH <<EOF
+  > [extensions]
+  > fix =
+  > [fix]
+  > missing:command=cat $TESTTMP/missing
+  > missing:pattern=missing
+  > missing:metadata=true
+  > invalid:command=cat $TESTTMP/invalid
+  > invalid:pattern=invalid
+  > invalid:metadata=true
+  > valid:command=cat $TESTTMP/valid
+  > valid:pattern=valid
+  > valid:metadata=true
+  > [hooks]
+  > postfixfile = python:$TESTTMP/postfixhook.py:file
+  > postfix = python:$TESTTMP/postfixhook.py:summarize
+  > [worker]
+  > enabled=false
+  > EOF
+
+See what happens when we execute each of the fixer tools. Some print warnings,
+some write back to the file.
+
+  $ hg init repo
+  $ cd repo
+
+  $ printf "old content\n" > invalid
+  $ printf "old content\n" > missing
+  $ printf "old content\n" > valid
+  $ hg add -q
+
+  $ hg fix -w
+  ignored invalid output from fixer tool: invalid
+  ignored invalid output from fixer tool: missing
+  fixed valid in revision 2147483647 using valid
+  saw "key" 1 times
+  fixed 1 files with valid
+  fixed the working copy
+
+  $ cat missing invalid valid
+  old content
+  old content
+  new content
+
+  $ cd ..
--- a/tests/test-fix.t	Tue Apr 23 15:49:17 2019 -0400
+++ b/tests/test-fix.t	Thu Mar 21 18:32:45 2019 -0700
@@ -185,6 +185,36 @@
   tool may see different values for the arguments added by the :linerange
   suboption.
   
+  Each fixer tool is allowed to return some metadata in addition to the fixed
+  file content. The metadata must be placed before the file content on stdout,
+  separated from the file content by a zero byte. The metadata is parsed as a
+  JSON value (so, it should be UTF-8 encoded and contain no zero bytes). A fixer
+  tool is expected to produce this metadata encoding if and only if the
+  :metadata suboption is true:
+  
+    [fix]
+    tool:command = tool --prepend-json-metadata
+    tool:metadata = true
+  
+  The metadata values are passed to hooks, which can be used to print summaries
+  or perform other post-fixing work. The supported hooks are:
+  
+    "postfixfile"
+      Run once for each file in each revision where any fixer tools made changes
+      to the file content. Provides "$HG_REV" and "$HG_PATH" to identify the file,
+      and "$HG_METADATA" with a map of fixer names to metadata values from fixer
+      tools that affected the file. Fixer tools that didn't affect the file have a
+      valueof None. Only fixer tools that executed are present in the metadata.
+  
+    "postfix"
+      Run once after all files and revisions have been handled. Provides
+      "$HG_REPLACEMENTS" with information about what revisions were created and
+      made obsolete. Provides a boolean "$HG_WDIRWRITTEN" to indicate whether any
+      files in the working copy were updated. Provides a list "$HG_METADATA"
+      mapping fixer tool names to lists of metadata values returned from
+      executions that modified a file. This aggregates the same metadata
+      previously passed to the "postfixfile" hook.
+  
   list of commands:
   
    fix           rewrite file content in changesets or working directory