Mercurial > hg
changeset 44941:edd08aa193fb
files: extract code for extra filtering of the `removed` entry into copies
We want to reduce the set of `removed` files that to the set of files actually
removed. That `removed` set is used as of the changeset centric algorithm,
having smaller sets means less processing and faster computation.
In this changeset we extract the code to be a function of it own. We will make
use of it in the next changesets.
Differential Revision: https://phab.mercurial-scm.org/D8588
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 27 May 2020 12:45:39 +0200 |
parents | 4c1d39215034 |
children | 25512a65cefd |
files | mercurial/localrepo.py mercurial/metadata.py |
diffstat | 2 files changed, 59 insertions(+), 45 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/localrepo.py Wed May 27 12:26:08 2020 +0200 +++ b/mercurial/localrepo.py Wed May 27 12:45:39 2020 +0200 @@ -46,6 +46,7 @@ match as matchmod, mergestate as mergestatemod, mergeutil, + metadata, namespaces, narrowspec, obsolete, @@ -3145,51 +3146,8 @@ for f in drop: del m[f] if p2.rev() != nullrev: - - @util.cachefunc - def mas(): - p1n = p1.node() - p2n = p2.node() - cahs = self.changelog.commonancestorsheads(p1n, p2n) - if not cahs: - cahs = [nullrev] - return [self[r].manifest() for r in cahs] - - def deletionfromparent(f): - # When a file is removed relative to p1 in a merge, this - # function determines whether the absence is due to a - # deletion from a parent, or whether the merge commit - # itself deletes the file. We decide this by doing a - # simplified three way merge of the manifest entry for - # the file. There are two ways we decide the merge - # itself didn't delete a file: - # - neither parent (nor the merge) contain the file - # - exactly one parent contains the file, and that - # parent has the same filelog entry as the merge - # ancestor (or all of them if there two). In other - # words, that parent left the file unchanged while the - # other one deleted it. - # One way to think about this is that deleting a file is - # similar to emptying it, so the list of changed files - # should be similar either way. The computation - # described above is not done directly in _filecommit - # when creating the list of changed files, however - # it does something very similar by comparing filelog - # nodes. - if f in m1: - return f not in m2 and all( - f in ma and ma.find(f) == m1.find(f) - for ma in mas() - ) - elif f in m2: - return all( - f in ma and ma.find(f) == m2.find(f) - for ma in mas() - ) - else: - return True - - removed = [f for f in removed if not deletionfromparent(f)] + rf = metadata.get_removal_filter(ctx, (p1, p2, m1, m2)) + removed = [f for f in removed if not rf(f)] files = changed + removed md = None
--- a/mercurial/metadata.py Wed May 27 12:26:08 2020 +0200 +++ b/mercurial/metadata.py Wed May 27 12:45:39 2020 +0200 @@ -11,6 +11,7 @@ from . import ( error, + node, pycompat, util, ) @@ -31,6 +32,61 @@ return added +def get_removal_filter(ctx, x=None): + """return a function to detect files "wrongly" detected as `removed` + + When a file is removed relative to p1 in a merge, this + function determines whether the absence is due to a + deletion from a parent, or whether the merge commit + itself deletes the file. We decide this by doing a + simplified three way merge of the manifest entry for + the file. There are two ways we decide the merge + itself didn't delete a file: + - neither parent (nor the merge) contain the file + - exactly one parent contains the file, and that + parent has the same filelog entry as the merge + ancestor (or all of them if there two). In other + words, that parent left the file unchanged while the + other one deleted it. + One way to think about this is that deleting a file is + similar to emptying it, so the list of changed files + should be similar either way. The computation + described above is not done directly in _filecommit + when creating the list of changed files, however + it does something very similar by comparing filelog + nodes. + """ + + if x is not None: + p1, p2, m1, m2 = x + else: + p1 = ctx.p1() + p2 = ctx.p2() + m1 = p1.manifest() + m2 = p2.manifest() + + @util.cachefunc + def mas(): + p1n = p1.node() + p2n = p2.node() + cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n) + if not cahs: + cahs = [node.nullrev] + return [ctx.repo()[r].manifest() for r in cahs] + + def deletionfromparent(f): + if f in m1: + return f not in m2 and all( + f in ma and ma.find(f) == m1.find(f) for ma in mas() + ) + elif f in m2: + return all(f in ma and ma.find(f) == m2.find(f) for ma in mas()) + else: + return True + + return deletionfromparent + + def computechangesetfilesremoved(ctx): """return the list of files removed in a changeset """