comparison mercurial/copies.py @ 46109:2f357d053df2

copies: make calculating lazy for dir move detection's "addedfiles" The information calculated here was only needed if (a) --debug was specified, or (b) a directory move was plausibly detected. With tree manifests (especially in my pathological repo and with our custom setup), pre-calculating the `u1` and `u2` can be quite slow, and it's not even necessary in many cases. Let's delay calculating it until we know it's actually necessary. This should have no observable differences in output. ### Performance I ran a rebase command in my pathological repo, rebasing two nodes across several public phase commits, but where no directory copies exist in any of the paths I'm tracking. #### Before ``` Time (mean ± σ): 3.711 s ± 0.061 s [User: 0.3 ms, System: 1.5 ms] Range (min … max): 3.640 s … 3.827 s 10 runs ``` #### After ``` Time (mean ± σ): 868.3 ms ± 10.1 ms [User: 0.5 ms, System: 1.2 ms] Range (min … max): 856.6 ms … 883.6 ms 10 runs ``` Differential Revision: https://phab.mercurial-scm.org/D9567
author Kyle Lippincott <spectral@google.com>
date Fri, 11 Dec 2020 13:39:56 -0800
parents e0313b0a6f7e
children 59fa3890d40a
comparison
equal deleted inserted replaced
46108:bdc2bf68f19e 46109:2f357d053df2
894 _checksinglesidecopies( 894 _checksinglesidecopies(
895 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2 895 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
896 ) 896 )
897 897
898 # find interesting file sets from manifests 898 # find interesting file sets from manifests
899 addedinm1 = m1.filesnotin(mb, repo.narrowmatch()) 899 cache = []
900 addedinm2 = m2.filesnotin(mb, repo.narrowmatch()) 900
901 u1 = sorted(addedinm1 - addedinm2) 901 def _get_addedfiles(idx):
902 u2 = sorted(addedinm2 - addedinm1) 902 if not cache:
903 903 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
904 header = b" unmatched files in %s" 904 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
905 if u1: 905 u1 = sorted(addedinm1 - addedinm2)
906 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))) 906 u2 = sorted(addedinm2 - addedinm1)
907 if u2: 907 cache.extend((u1, u2))
908 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))) 908 return cache[idx]
909 909
910 u1fn = lambda: _get_addedfiles(0)
911 u2fn = lambda: _get_addedfiles(1)
910 if repo.ui.debugflag: 912 if repo.ui.debugflag:
913 u1 = u1fn()
914 u2 = u2fn()
915
916 header = b" unmatched files in %s"
917 if u1:
918 repo.ui.debug(
919 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
920 )
921 if u2:
922 repo.ui.debug(
923 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
924 )
925
911 renamedeleteset = set() 926 renamedeleteset = set()
912 divergeset = set() 927 divergeset = set()
913 for dsts in diverge.values(): 928 for dsts in diverge.values():
914 divergeset.update(dsts) 929 divergeset.update(dsts)
915 for dsts in renamedelete1.values(): 930 for dsts in renamedelete1.values():
939 del renamedeleteset 954 del renamedeleteset
940 del divergeset 955 del divergeset
941 956
942 repo.ui.debug(b" checking for directory renames\n") 957 repo.ui.debug(b" checking for directory renames\n")
943 958
944 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2) 959 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
945 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1) 960 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
946 961
947 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1) 962 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
948 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2) 963 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
949 964
950 return branch_copies1, branch_copies2, diverge 965 return branch_copies1, branch_copies2, diverge
951 966
952 967
953 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles): 968 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
954 """Finds moved directories and files that should move with them. 969 """Finds moved directories and files that should move with them.
955 970
956 ctx: the context for one of the sides 971 ctx: the context for one of the sides
957 copy: files copied on the same side (as ctx) 972 copy: files copied on the same side (as ctx)
958 fullcopy: files copied on the same side (as ctx), including those that 973 fullcopy: files copied on the same side (as ctx), including those that
959 merge.manifestmerge() won't care about 974 merge.manifestmerge() won't care about
960 addedfiles: added files on the other side (compared to ctx) 975 addedfilesfn: function returning added files on the other side (compared to
976 ctx)
961 """ 977 """
962 # generate a directory move map 978 # generate a directory move map
963 invalid = set() 979 invalid = set()
964 dirmove = {} 980 dirmove = {}
965 981
995 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d]) 1011 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
996 ) 1012 )
997 1013
998 movewithdir = {} 1014 movewithdir = {}
999 # check unaccounted nonoverlapping files against directory moves 1015 # check unaccounted nonoverlapping files against directory moves
1000 for f in addedfiles: 1016 for f in addedfilesfn():
1001 if f not in fullcopy: 1017 if f not in fullcopy:
1002 for d in dirmove: 1018 for d in dirmove:
1003 if f.startswith(d): 1019 if f.startswith(d):
1004 # new file added in a directory that was moved, move it 1020 # new file added in a directory that was moved, move it
1005 df = dirmove[d] + f[len(d) :] 1021 df = dirmove[d] + f[len(d) :]