Mercurial > hg-stable
changeset 46110:2f357d053df2
copies: make calculating lazy for dir move detection's "addedfiles"
The information calculated here was only needed if (a) --debug was specified, or
(b) a directory move was plausibly detected. With tree manifests (especially in
my pathological repo and with our custom setup), pre-calculating the `u1` and
`u2` can be quite slow, and it's not even necessary in many cases. Let's delay
calculating it until we know it's actually necessary. This should have no
observable differences in output.
### Performance
I ran a rebase command in my pathological repo, rebasing two nodes across
several public phase commits, but where no directory copies exist in any of the
paths I'm tracking.
#### Before
```
Time (mean ± σ): 3.711 s ± 0.061 s [User: 0.3 ms, System: 1.5 ms]
Range (min … max): 3.640 s … 3.827 s 10 runs
```
#### After
```
Time (mean ± σ): 868.3 ms ± 10.1 ms [User: 0.5 ms, System: 1.2 ms]
Range (min … max): 856.6 ms … 883.6 ms 10 runs
```
Differential Revision: https://phab.mercurial-scm.org/D9567
author | Kyle Lippincott <spectral@google.com> |
---|---|
date | Fri, 11 Dec 2020 13:39:56 -0800 |
parents | bdc2bf68f19e |
children | d90f439ff19f |
files | mercurial/copies.py |
diffstat | 1 files changed, 31 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/copies.py Tue Dec 08 16:45:13 2020 -0800 +++ b/mercurial/copies.py Fri Dec 11 13:39:56 2020 -0800 @@ -896,18 +896,33 @@ ) # find interesting file sets from manifests - addedinm1 = m1.filesnotin(mb, repo.narrowmatch()) - addedinm2 = m2.filesnotin(mb, repo.narrowmatch()) - u1 = sorted(addedinm1 - addedinm2) - u2 = sorted(addedinm2 - addedinm1) + cache = [] + + def _get_addedfiles(idx): + if not cache: + addedinm1 = m1.filesnotin(mb, repo.narrowmatch()) + addedinm2 = m2.filesnotin(mb, repo.narrowmatch()) + u1 = sorted(addedinm1 - addedinm2) + u2 = sorted(addedinm2 - addedinm1) + cache.extend((u1, u2)) + return cache[idx] - header = b" unmatched files in %s" - if u1: - repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))) - if u2: - repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))) + u1fn = lambda: _get_addedfiles(0) + u2fn = lambda: _get_addedfiles(1) + if repo.ui.debugflag: + u1 = u1fn() + u2 = u2fn() - if repo.ui.debugflag: + header = b" unmatched files in %s" + if u1: + repo.ui.debug( + b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)) + ) + if u2: + repo.ui.debug( + b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)) + ) + renamedeleteset = set() divergeset = set() for dsts in diverge.values(): @@ -941,8 +956,8 @@ repo.ui.debug(b" checking for directory renames\n") - dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2) - dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1) + dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn) + dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn) branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1) branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2) @@ -950,14 +965,15 @@ return branch_copies1, branch_copies2, diverge -def _dir_renames(repo, ctx, copy, fullcopy, addedfiles): +def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn): """Finds moved directories and files that should move with them. ctx: the context for one of the sides copy: files copied on the same side (as ctx) fullcopy: files copied on the same side (as ctx), including those that merge.manifestmerge() won't care about - addedfiles: added files on the other side (compared to ctx) + addedfilesfn: function returning added files on the other side (compared to + ctx) """ # generate a directory move map invalid = set() @@ -997,7 +1013,7 @@ movewithdir = {} # check unaccounted nonoverlapping files against directory moves - for f in addedfiles: + for f in addedfilesfn(): if f not in fullcopy: for d in dirmove: if f.startswith(d):