comparison mercurial/revlogutils/deltas.py @ 49678:efbbc2f9121e

delta-find: use a smarter object for snapshot caching This open the way for a longer lived cache.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sun, 06 Nov 2022 16:56:23 -0500
parents 05db41701ece
children b670eb3dd6c9
comparison
equal deleted inserted replaced
49677:05db41701ece 49678:efbbc2f9121e
797 break 797 break
798 798
799 yield None 799 yield None
800 800
801 801
802 def _findsnapshots(revlog, cache, start_rev):
803 """find snapshot from start_rev to tip"""
804 if util.safehasattr(revlog.index, b'findsnapshots'):
805 revlog.index.findsnapshots(cache, start_rev)
806 else:
807 deltaparent = revlog.deltaparent
808 issnapshot = revlog.issnapshot
809 for rev in revlog.revs(start_rev):
810 if issnapshot(rev):
811 cache[deltaparent(rev)].append(rev)
812
813
814 def _refinedgroups(revlog, p1, p2, cachedelta): 802 def _refinedgroups(revlog, p1, p2, cachedelta):
815 good = None 803 good = None
816 # First we try to reuse a the delta contained in the bundle. 804 # First we try to reuse a the delta contained in the bundle.
817 # (or from the source revlog) 805 # (or from the source revlog)
818 # 806 #
830 if debug_info is not None: 818 if debug_info is not None:
831 debug_info['cached-delta.accepted'] += 1 819 debug_info['cached-delta.accepted'] += 1
832 yield None 820 yield None
833 return 821 return
834 # XXX cache me higher 822 # XXX cache me higher
835 snapshots = collections.defaultdict(list) 823 snapshot_cache = SnapshotCache()
836 groups = _rawgroups( 824 groups = _rawgroups(
837 revlog, 825 revlog,
838 p1, 826 p1,
839 p2, 827 p2,
840 cachedelta, 828 cachedelta,
841 snapshots, 829 snapshot_cache,
842 ) 830 )
843 for candidates in groups: 831 for candidates in groups:
844 good = yield candidates 832 good = yield candidates
845 if good is not None: 833 if good is not None:
846 break 834 break
859 base = revlog.deltaparent(good) 847 base = revlog.deltaparent(good)
860 if base == nullrev: 848 if base == nullrev:
861 break 849 break
862 good = yield (base,) 850 good = yield (base,)
863 # refine snapshot up 851 # refine snapshot up
864 if not snapshots: 852 if not snapshot_cache.snapshots:
865 _findsnapshots(revlog, snapshots, good + 1) 853 snapshot_cache.update(revlog, good + 1)
866 previous = None 854 previous = None
867 while good != previous: 855 while good != previous:
868 previous = good 856 previous = good
869 children = tuple(sorted(c for c in snapshots[good])) 857 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
870 good = yield children 858 good = yield children
871 859
872 if debug_info is not None: 860 if debug_info is not None:
873 if good is None: 861 if good is None:
874 debug_info['no-solution'] += 1 862 debug_info['no-solution'] += 1
875 863
876 yield None 864 yield None
877 865
878 866
879 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None): 867 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
880 """Provides group of revision to be tested as delta base 868 """Provides group of revision to be tested as delta base
881 869
882 This lower level function focus on emitting delta theorically interresting 870 This lower level function focus on emitting delta theorically interresting
883 without looking it any practical details. 871 without looking it any practical details.
884 872
905 elif len(parents) > 0: 893 elif len(parents) > 0:
906 # Test all parents (1 or 2), and keep the best candidate 894 # Test all parents (1 or 2), and keep the best candidate
907 yield parents 895 yield parents
908 896
909 if sparse and parents: 897 if sparse and parents:
910 if snapshots is None: 898 if snapshot_cache is None:
911 # map: base-rev: [snapshot-revs] 899 # map: base-rev: [snapshot-revs]
912 snapshots = collections.defaultdict(list) 900 snapshot_cache = SnapshotCache()
913 # See if we can use an existing snapshot in the parent chains to use as 901 # See if we can use an existing snapshot in the parent chains to use as
914 # a base for a new intermediate-snapshot 902 # a base for a new intermediate-snapshot
915 # 903 #
916 # search for snapshot in parents delta chain 904 # search for snapshot in parents delta chain
917 # map: snapshot-level: snapshot-rev 905 # map: snapshot-level: snapshot-rev
921 for idx, s in enumerate(chain): 909 for idx, s in enumerate(chain):
922 if not revlog.issnapshot(s): 910 if not revlog.issnapshot(s):
923 break 911 break
924 parents_snaps[idx].add(s) 912 parents_snaps[idx].add(s)
925 snapfloor = min(parents_snaps[0]) + 1 913 snapfloor = min(parents_snaps[0]) + 1
926 _findsnapshots(revlog, snapshots, snapfloor) 914 snapshot_cache.update(revlog, snapfloor)
927 # search for the highest "unrelated" revision 915 # search for the highest "unrelated" revision
928 # 916 #
929 # Adding snapshots used by "unrelated" revision increase the odd we 917 # Adding snapshots used by "unrelated" revision increase the odd we
930 # reuse an independant, yet better snapshot chain. 918 # reuse an independant, yet better snapshot chain.
931 # 919 #
959 # likely to result in small delta 947 # likely to result in small delta
960 floor = None 948 floor = None
961 for idx, snaps in sorted(parents_snaps.items(), reverse=True): 949 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
962 siblings = set() 950 siblings = set()
963 for s in snaps: 951 for s in snaps:
964 siblings.update(snapshots[s]) 952 siblings.update(snapshot_cache.snapshots[s])
965 # Before considering making a new intermediate snapshot, we check 953 # Before considering making a new intermediate snapshot, we check
966 # if an existing snapshot, children of base we consider, would be 954 # if an existing snapshot, children of base we consider, would be
967 # suitable. 955 # suitable.
968 # 956 #
969 # It give a change to reuse a delta chain "unrelated" to the 957 # It give a change to reuse a delta chain "unrelated" to the
987 # 975 #
988 # It give a chance to reuse a delta chain unrelated to the current 976 # It give a chance to reuse a delta chain unrelated to the current
989 # revisions instead of starting our own. Without such re-use, 977 # revisions instead of starting our own. Without such re-use,
990 # topological branches would keep reopening new full chains. Creating 978 # topological branches would keep reopening new full chains. Creating
991 # more and more snapshot as the repository grow. 979 # more and more snapshot as the repository grow.
992 yield tuple(snapshots[nullrev]) 980 yield tuple(snapshot_cache.snapshots[nullrev])
993 981
994 if not sparse: 982 if not sparse:
995 # other approach failed try against prev to hopefully save us a 983 # other approach failed try against prev to hopefully save us a
996 # fulltext. 984 # fulltext.
997 yield (prev,) 985 yield (prev,)
986
987
988 class SnapshotCache:
989 __slots__ = ('snapshots', '_start_rev', '_end_rev')
990
991 def __init__(self):
992 # XXX should probably be a set ?
993 self.snapshots = collections.defaultdict(list)
994 self._start_rev = None
995 self._end_rev = None
996
997 def update(self, revlog, start_rev=0):
998 """find snapshots from start_rev to tip"""
999 nb_revs = len(revlog)
1000 end_rev = nb_revs - 1
1001 if start_rev > end_rev:
1002 return # range is empty
1003
1004 if self._start_rev is None:
1005 assert self._end_rev is None
1006 self._update(revlog, start_rev, end_rev)
1007 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1008 if start_rev < self._start_rev:
1009 self._update(revlog, start_rev, self._start_rev - 1)
1010 if self._end_rev < end_rev:
1011 self._update(revlog, self._end_rev + 1, end_rev)
1012
1013 if self._start_rev is None:
1014 assert self._end_rev is None
1015 self._end_rev = end_rev
1016 self._start_rev = start_rev
1017 else:
1018 self._start_rev = min(self._start_rev, start_rev)
1019 self._end_rev = max(self._end_rev, end_rev)
1020 assert self._start_rev <= self._end_rev, (
1021 self._start_rev,
1022 self._end_rev,
1023 )
1024
1025 def _update(self, revlog, start_rev, end_rev):
1026 """internal method that actually do update content"""
1027 assert self._start_rev is None or (
1028 start_rev < self._start_rev or start_rev > self._end_rev
1029 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1030 assert self._start_rev is None or (
1031 end_rev < self._start_rev or end_rev > self._end_rev
1032 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1033 cache = self.snapshots
1034 if util.safehasattr(revlog.index, b'findsnapshots'):
1035 revlog.index.findsnapshots(cache, start_rev, end_rev)
1036 else:
1037 deltaparent = revlog.deltaparent
1038 issnapshot = revlog.issnapshot
1039 for rev in revlog.revs(start_rev, end_rev):
1040 if issnapshot(rev):
1041 cache[deltaparent(rev)].append(rev)
998 1042
999 1043
1000 class deltacomputer: 1044 class deltacomputer:
1001 def __init__( 1045 def __init__(
1002 self, 1046 self,