Mercurial > hg
comparison mercurial/revlogutils/deltas.py @ 49678:efbbc2f9121e
delta-find: use a smarter object for snapshot caching
This open the way for a longer lived cache.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Sun, 06 Nov 2022 16:56:23 -0500 |
parents | 05db41701ece |
children | b670eb3dd6c9 |
comparison
equal
deleted
inserted
replaced
49677:05db41701ece | 49678:efbbc2f9121e |
---|---|
797 break | 797 break |
798 | 798 |
799 yield None | 799 yield None |
800 | 800 |
801 | 801 |
802 def _findsnapshots(revlog, cache, start_rev): | |
803 """find snapshot from start_rev to tip""" | |
804 if util.safehasattr(revlog.index, b'findsnapshots'): | |
805 revlog.index.findsnapshots(cache, start_rev) | |
806 else: | |
807 deltaparent = revlog.deltaparent | |
808 issnapshot = revlog.issnapshot | |
809 for rev in revlog.revs(start_rev): | |
810 if issnapshot(rev): | |
811 cache[deltaparent(rev)].append(rev) | |
812 | |
813 | |
814 def _refinedgroups(revlog, p1, p2, cachedelta): | 802 def _refinedgroups(revlog, p1, p2, cachedelta): |
815 good = None | 803 good = None |
816 # First we try to reuse a the delta contained in the bundle. | 804 # First we try to reuse a the delta contained in the bundle. |
817 # (or from the source revlog) | 805 # (or from the source revlog) |
818 # | 806 # |
830 if debug_info is not None: | 818 if debug_info is not None: |
831 debug_info['cached-delta.accepted'] += 1 | 819 debug_info['cached-delta.accepted'] += 1 |
832 yield None | 820 yield None |
833 return | 821 return |
834 # XXX cache me higher | 822 # XXX cache me higher |
835 snapshots = collections.defaultdict(list) | 823 snapshot_cache = SnapshotCache() |
836 groups = _rawgroups( | 824 groups = _rawgroups( |
837 revlog, | 825 revlog, |
838 p1, | 826 p1, |
839 p2, | 827 p2, |
840 cachedelta, | 828 cachedelta, |
841 snapshots, | 829 snapshot_cache, |
842 ) | 830 ) |
843 for candidates in groups: | 831 for candidates in groups: |
844 good = yield candidates | 832 good = yield candidates |
845 if good is not None: | 833 if good is not None: |
846 break | 834 break |
859 base = revlog.deltaparent(good) | 847 base = revlog.deltaparent(good) |
860 if base == nullrev: | 848 if base == nullrev: |
861 break | 849 break |
862 good = yield (base,) | 850 good = yield (base,) |
863 # refine snapshot up | 851 # refine snapshot up |
864 if not snapshots: | 852 if not snapshot_cache.snapshots: |
865 _findsnapshots(revlog, snapshots, good + 1) | 853 snapshot_cache.update(revlog, good + 1) |
866 previous = None | 854 previous = None |
867 while good != previous: | 855 while good != previous: |
868 previous = good | 856 previous = good |
869 children = tuple(sorted(c for c in snapshots[good])) | 857 children = tuple(sorted(c for c in snapshot_cache.snapshots[good])) |
870 good = yield children | 858 good = yield children |
871 | 859 |
872 if debug_info is not None: | 860 if debug_info is not None: |
873 if good is None: | 861 if good is None: |
874 debug_info['no-solution'] += 1 | 862 debug_info['no-solution'] += 1 |
875 | 863 |
876 yield None | 864 yield None |
877 | 865 |
878 | 866 |
879 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None): | 867 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None): |
880 """Provides group of revision to be tested as delta base | 868 """Provides group of revision to be tested as delta base |
881 | 869 |
882 This lower level function focus on emitting delta theorically interresting | 870 This lower level function focus on emitting delta theorically interresting |
883 without looking it any practical details. | 871 without looking it any practical details. |
884 | 872 |
905 elif len(parents) > 0: | 893 elif len(parents) > 0: |
906 # Test all parents (1 or 2), and keep the best candidate | 894 # Test all parents (1 or 2), and keep the best candidate |
907 yield parents | 895 yield parents |
908 | 896 |
909 if sparse and parents: | 897 if sparse and parents: |
910 if snapshots is None: | 898 if snapshot_cache is None: |
911 # map: base-rev: [snapshot-revs] | 899 # map: base-rev: [snapshot-revs] |
912 snapshots = collections.defaultdict(list) | 900 snapshot_cache = SnapshotCache() |
913 # See if we can use an existing snapshot in the parent chains to use as | 901 # See if we can use an existing snapshot in the parent chains to use as |
914 # a base for a new intermediate-snapshot | 902 # a base for a new intermediate-snapshot |
915 # | 903 # |
916 # search for snapshot in parents delta chain | 904 # search for snapshot in parents delta chain |
917 # map: snapshot-level: snapshot-rev | 905 # map: snapshot-level: snapshot-rev |
921 for idx, s in enumerate(chain): | 909 for idx, s in enumerate(chain): |
922 if not revlog.issnapshot(s): | 910 if not revlog.issnapshot(s): |
923 break | 911 break |
924 parents_snaps[idx].add(s) | 912 parents_snaps[idx].add(s) |
925 snapfloor = min(parents_snaps[0]) + 1 | 913 snapfloor = min(parents_snaps[0]) + 1 |
926 _findsnapshots(revlog, snapshots, snapfloor) | 914 snapshot_cache.update(revlog, snapfloor) |
927 # search for the highest "unrelated" revision | 915 # search for the highest "unrelated" revision |
928 # | 916 # |
929 # Adding snapshots used by "unrelated" revision increase the odd we | 917 # Adding snapshots used by "unrelated" revision increase the odd we |
930 # reuse an independant, yet better snapshot chain. | 918 # reuse an independant, yet better snapshot chain. |
931 # | 919 # |
959 # likely to result in small delta | 947 # likely to result in small delta |
960 floor = None | 948 floor = None |
961 for idx, snaps in sorted(parents_snaps.items(), reverse=True): | 949 for idx, snaps in sorted(parents_snaps.items(), reverse=True): |
962 siblings = set() | 950 siblings = set() |
963 for s in snaps: | 951 for s in snaps: |
964 siblings.update(snapshots[s]) | 952 siblings.update(snapshot_cache.snapshots[s]) |
965 # Before considering making a new intermediate snapshot, we check | 953 # Before considering making a new intermediate snapshot, we check |
966 # if an existing snapshot, children of base we consider, would be | 954 # if an existing snapshot, children of base we consider, would be |
967 # suitable. | 955 # suitable. |
968 # | 956 # |
969 # It give a change to reuse a delta chain "unrelated" to the | 957 # It give a change to reuse a delta chain "unrelated" to the |
987 # | 975 # |
988 # It give a chance to reuse a delta chain unrelated to the current | 976 # It give a chance to reuse a delta chain unrelated to the current |
989 # revisions instead of starting our own. Without such re-use, | 977 # revisions instead of starting our own. Without such re-use, |
990 # topological branches would keep reopening new full chains. Creating | 978 # topological branches would keep reopening new full chains. Creating |
991 # more and more snapshot as the repository grow. | 979 # more and more snapshot as the repository grow. |
992 yield tuple(snapshots[nullrev]) | 980 yield tuple(snapshot_cache.snapshots[nullrev]) |
993 | 981 |
994 if not sparse: | 982 if not sparse: |
995 # other approach failed try against prev to hopefully save us a | 983 # other approach failed try against prev to hopefully save us a |
996 # fulltext. | 984 # fulltext. |
997 yield (prev,) | 985 yield (prev,) |
986 | |
987 | |
988 class SnapshotCache: | |
989 __slots__ = ('snapshots', '_start_rev', '_end_rev') | |
990 | |
991 def __init__(self): | |
992 # XXX should probably be a set ? | |
993 self.snapshots = collections.defaultdict(list) | |
994 self._start_rev = None | |
995 self._end_rev = None | |
996 | |
997 def update(self, revlog, start_rev=0): | |
998 """find snapshots from start_rev to tip""" | |
999 nb_revs = len(revlog) | |
1000 end_rev = nb_revs - 1 | |
1001 if start_rev > end_rev: | |
1002 return # range is empty | |
1003 | |
1004 if self._start_rev is None: | |
1005 assert self._end_rev is None | |
1006 self._update(revlog, start_rev, end_rev) | |
1007 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev): | |
1008 if start_rev < self._start_rev: | |
1009 self._update(revlog, start_rev, self._start_rev - 1) | |
1010 if self._end_rev < end_rev: | |
1011 self._update(revlog, self._end_rev + 1, end_rev) | |
1012 | |
1013 if self._start_rev is None: | |
1014 assert self._end_rev is None | |
1015 self._end_rev = end_rev | |
1016 self._start_rev = start_rev | |
1017 else: | |
1018 self._start_rev = min(self._start_rev, start_rev) | |
1019 self._end_rev = max(self._end_rev, end_rev) | |
1020 assert self._start_rev <= self._end_rev, ( | |
1021 self._start_rev, | |
1022 self._end_rev, | |
1023 ) | |
1024 | |
1025 def _update(self, revlog, start_rev, end_rev): | |
1026 """internal method that actually do update content""" | |
1027 assert self._start_rev is None or ( | |
1028 start_rev < self._start_rev or start_rev > self._end_rev | |
1029 ), (self._start_rev, self._end_rev, start_rev, end_rev) | |
1030 assert self._start_rev is None or ( | |
1031 end_rev < self._start_rev or end_rev > self._end_rev | |
1032 ), (self._start_rev, self._end_rev, start_rev, end_rev) | |
1033 cache = self.snapshots | |
1034 if util.safehasattr(revlog.index, b'findsnapshots'): | |
1035 revlog.index.findsnapshots(cache, start_rev, end_rev) | |
1036 else: | |
1037 deltaparent = revlog.deltaparent | |
1038 issnapshot = revlog.issnapshot | |
1039 for rev in revlog.revs(start_rev, end_rev): | |
1040 if issnapshot(rev): | |
1041 cache[deltaparent(rev)].append(rev) | |
998 | 1042 |
999 | 1043 |
1000 class deltacomputer: | 1044 class deltacomputer: |
1001 def __init__( | 1045 def __init__( |
1002 self, | 1046 self, |