Mercurial > hg
comparison mercurial/copies.py @ 46109:2f357d053df2
copies: make calculating lazy for dir move detection's "addedfiles"
The information calculated here was only needed if (a) --debug was specified, or
(b) a directory move was plausibly detected. With tree manifests (especially in
my pathological repo and with our custom setup), pre-calculating the `u1` and
`u2` can be quite slow, and it's not even necessary in many cases. Let's delay
calculating it until we know it's actually necessary. This should have no
observable differences in output.
### Performance
I ran a rebase command in my pathological repo, rebasing two nodes across
several public phase commits, but where no directory copies exist in any of the
paths I'm tracking.
#### Before
```
Time (mean ± σ): 3.711 s ± 0.061 s [User: 0.3 ms, System: 1.5 ms]
Range (min … max): 3.640 s … 3.827 s 10 runs
```
#### After
```
Time (mean ± σ): 868.3 ms ± 10.1 ms [User: 0.5 ms, System: 1.2 ms]
Range (min … max): 856.6 ms … 883.6 ms 10 runs
```
Differential Revision: https://phab.mercurial-scm.org/D9567
author | Kyle Lippincott <spectral@google.com> |
---|---|
date | Fri, 11 Dec 2020 13:39:56 -0800 |
parents | e0313b0a6f7e |
children | 59fa3890d40a |
comparison
equal
deleted
inserted
replaced
46108:bdc2bf68f19e | 46109:2f357d053df2 |
---|---|
894 _checksinglesidecopies( | 894 _checksinglesidecopies( |
895 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2 | 895 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2 |
896 ) | 896 ) |
897 | 897 |
898 # find interesting file sets from manifests | 898 # find interesting file sets from manifests |
899 addedinm1 = m1.filesnotin(mb, repo.narrowmatch()) | 899 cache = [] |
900 addedinm2 = m2.filesnotin(mb, repo.narrowmatch()) | 900 |
901 u1 = sorted(addedinm1 - addedinm2) | 901 def _get_addedfiles(idx): |
902 u2 = sorted(addedinm2 - addedinm1) | 902 if not cache: |
903 | 903 addedinm1 = m1.filesnotin(mb, repo.narrowmatch()) |
904 header = b" unmatched files in %s" | 904 addedinm2 = m2.filesnotin(mb, repo.narrowmatch()) |
905 if u1: | 905 u1 = sorted(addedinm1 - addedinm2) |
906 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))) | 906 u2 = sorted(addedinm2 - addedinm1) |
907 if u2: | 907 cache.extend((u1, u2)) |
908 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))) | 908 return cache[idx] |
909 | 909 |
910 u1fn = lambda: _get_addedfiles(0) | |
911 u2fn = lambda: _get_addedfiles(1) | |
910 if repo.ui.debugflag: | 912 if repo.ui.debugflag: |
913 u1 = u1fn() | |
914 u2 = u2fn() | |
915 | |
916 header = b" unmatched files in %s" | |
917 if u1: | |
918 repo.ui.debug( | |
919 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)) | |
920 ) | |
921 if u2: | |
922 repo.ui.debug( | |
923 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)) | |
924 ) | |
925 | |
911 renamedeleteset = set() | 926 renamedeleteset = set() |
912 divergeset = set() | 927 divergeset = set() |
913 for dsts in diverge.values(): | 928 for dsts in diverge.values(): |
914 divergeset.update(dsts) | 929 divergeset.update(dsts) |
915 for dsts in renamedelete1.values(): | 930 for dsts in renamedelete1.values(): |
939 del renamedeleteset | 954 del renamedeleteset |
940 del divergeset | 955 del divergeset |
941 | 956 |
942 repo.ui.debug(b" checking for directory renames\n") | 957 repo.ui.debug(b" checking for directory renames\n") |
943 | 958 |
944 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2) | 959 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn) |
945 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1) | 960 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn) |
946 | 961 |
947 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1) | 962 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1) |
948 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2) | 963 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2) |
949 | 964 |
950 return branch_copies1, branch_copies2, diverge | 965 return branch_copies1, branch_copies2, diverge |
951 | 966 |
952 | 967 |
953 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles): | 968 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn): |
954 """Finds moved directories and files that should move with them. | 969 """Finds moved directories and files that should move with them. |
955 | 970 |
956 ctx: the context for one of the sides | 971 ctx: the context for one of the sides |
957 copy: files copied on the same side (as ctx) | 972 copy: files copied on the same side (as ctx) |
958 fullcopy: files copied on the same side (as ctx), including those that | 973 fullcopy: files copied on the same side (as ctx), including those that |
959 merge.manifestmerge() won't care about | 974 merge.manifestmerge() won't care about |
960 addedfiles: added files on the other side (compared to ctx) | 975 addedfilesfn: function returning added files on the other side (compared to |
976 ctx) | |
961 """ | 977 """ |
962 # generate a directory move map | 978 # generate a directory move map |
963 invalid = set() | 979 invalid = set() |
964 dirmove = {} | 980 dirmove = {} |
965 | 981 |
995 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d]) | 1011 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d]) |
996 ) | 1012 ) |
997 | 1013 |
998 movewithdir = {} | 1014 movewithdir = {} |
999 # check unaccounted nonoverlapping files against directory moves | 1015 # check unaccounted nonoverlapping files against directory moves |
1000 for f in addedfiles: | 1016 for f in addedfilesfn(): |
1001 if f not in fullcopy: | 1017 if f not in fullcopy: |
1002 for d in dirmove: | 1018 for d in dirmove: |
1003 if f.startswith(d): | 1019 if f.startswith(d): |
1004 # new file added in a directory that was moved, move it | 1020 # new file added in a directory that was moved, move it |
1005 df = dirmove[d] + f[len(d) :] | 1021 df = dirmove[d] + f[len(d) :] |