hgext/fix.py
changeset 38536 5ffe2041d427
parent 38423 32fba6fe893d
child 38537 a3be09e277e9
equal deleted inserted replaced
38535:8c38d2948217 38536:5ffe2041d427
    68     obsolete,
    68     obsolete,
    69     pycompat,
    69     pycompat,
    70     registrar,
    70     registrar,
    71     scmutil,
    71     scmutil,
    72     util,
    72     util,
       
    73     worker,
    73 )
    74 )
    74 
    75 
    75 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
    76 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
    76 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
    77 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
    77 # be specifying the version(s) of Mercurial they are tested with, or
    78 # be specifying the version(s) of Mercurial they are tested with, or
   136     with repo.wlock(), repo.lock(), repo.transaction('fix'):
   137     with repo.wlock(), repo.lock(), repo.transaction('fix'):
   137         revstofix = getrevstofix(ui, repo, opts)
   138         revstofix = getrevstofix(ui, repo, opts)
   138         basectxs = getbasectxs(repo, opts, revstofix)
   139         basectxs = getbasectxs(repo, opts, revstofix)
   139         workqueue, numitems = getworkqueue(ui, repo, pats, opts, revstofix,
   140         workqueue, numitems = getworkqueue(ui, repo, pats, opts, revstofix,
   140                                            basectxs)
   141                                            basectxs)
       
   142         fixers = getfixers(ui)
       
   143 
       
   144         # There are no data dependencies between the workers fixing each file
       
   145         # revision, so we can use all available parallelism.
       
   146         def getfixes(items):
       
   147             for rev, path in items:
       
   148                 ctx = repo[rev]
       
   149                 olddata = ctx[path].data()
       
   150                 newdata = fixfile(ui, opts, fixers, ctx, path, basectxs[rev])
       
   151                 # Don't waste memory/time passing unchanged content back, but
       
   152                 # produce one result per item either way.
       
   153                 yield (rev, path, newdata if newdata != olddata else None)
       
   154         results = worker.worker(ui, 1.0, getfixes, tuple(), workqueue)
       
   155 
       
   156         # We have to hold on to the data for each successor revision in memory
       
   157         # until all its parents are committed. We ensure this by committing and
       
   158         # freeing memory for the revisions in some topological order. This
       
   159         # leaves a little bit of memory efficiency on the table, but also makes
       
   160         # the tests deterministic. It might also be considered a feature since
       
   161         # it makes the results more easily reproducible.
   141         filedata = collections.defaultdict(dict)
   162         filedata = collections.defaultdict(dict)
   142         replacements = {}
   163         replacements = {}
   143         fixers = getfixers(ui)
   164         commitorder = sorted(revstofix, reverse=True)
   144         # Some day this loop can become a worker pool, but for now it's easier
   165         for rev, path, newdata in results:
   145         # to fix everything serially in topological order.
   166             if newdata is not None:
   146         for rev, path in sorted(workqueue):
       
   147             ctx = repo[rev]
       
   148             olddata = ctx[path].data()
       
   149             newdata = fixfile(ui, opts, fixers, ctx, path, basectxs[rev])
       
   150             if newdata != olddata:
       
   151                 filedata[rev][path] = newdata
   167                 filedata[rev][path] = newdata
   152             numitems[rev] -= 1
   168             numitems[rev] -= 1
   153             if not numitems[rev]:
   169             # Apply the fixes for this and any other revisions that are ready
       
   170             # and sitting at the front of the queue. Using a loop here prevents
       
   171             # the queue from being blocked by the first revision to be ready out
       
   172             # of order.
       
   173             while commitorder and not numitems[commitorder[-1]]:
       
   174                 rev = commitorder.pop()
       
   175                 ctx = repo[rev]
   154                 if rev == wdirrev:
   176                 if rev == wdirrev:
   155                     writeworkingdir(repo, ctx, filedata[rev], replacements)
   177                     writeworkingdir(repo, ctx, filedata[rev], replacements)
   156                 else:
   178                 else:
   157                     replacerev(ui, repo, ctx, filedata[rev], replacements)
   179                     replacerev(ui, repo, ctx, filedata[rev], replacements)
   158                 del filedata[rev]
   180                 del filedata[rev]
   166     It is up to the caller how to consume the work items, and the only
   188     It is up to the caller how to consume the work items, and the only
   167     dependence between them is that replacement revisions must be committed in
   189     dependence between them is that replacement revisions must be committed in
   168     topological order. Each work item represents a file in the working copy or
   190     topological order. Each work item represents a file in the working copy or
   169     in some revision that should be fixed and written back to the working copy
   191     in some revision that should be fixed and written back to the working copy
   170     or into a replacement revision.
   192     or into a replacement revision.
       
   193 
       
   194     Work items for the same revision are grouped together, so that a worker
       
   195     pool starting with the first N items in parallel is likely to finish the
       
   196     first revision's work before other revisions. This can allow us to write
       
   197     the result to disk and reduce memory footprint. At time of writing, the
       
   198     partition strategy in worker.py seems favorable to this. We also sort the
       
   199     items by ascending revision number to match the order in which we commit
       
   200     the fixes later.
   171     """
   201     """
   172     workqueue = []
   202     workqueue = []
   173     numitems = collections.defaultdict(int)
   203     numitems = collections.defaultdict(int)
   174     maxfilesize = ui.configbytes('fix', 'maxfilesize')
   204     maxfilesize = ui.configbytes('fix', 'maxfilesize')
   175     for rev in revstofix:
   205     for rev in sorted(revstofix):
   176         fixctx = repo[rev]
   206         fixctx = repo[rev]
   177         match = scmutil.match(fixctx, pats, opts)
   207         match = scmutil.match(fixctx, pats, opts)
   178         for path in pathstofix(ui, repo, pats, opts, match, basectxs[rev],
   208         for path in pathstofix(ui, repo, pats, opts, match, basectxs[rev],
   179                                fixctx):
   209                                fixctx):
   180             if path not in fixctx:
   210             if path not in fixctx: