comparison mercurial/branchmap.py @ 51531:f85f23f1479b

branchcache: skip entries that are topological heads in the on disk file In the majority of cases, topological heads are also branch heads. We have efficient way to get the topological heads and efficient way to retrieve their branch information. So there is little value in putting them in the branch cache file explicitly. On the contrary, writing them explicitly tend to create very large cache file that are inefficient to read and update. So the branch cache v3 format is no longer including them. This changeset focus on the format aspect and have no focus on the performance aspect. We will cover that later.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Thu, 07 Mar 2024 10:55:22 +0100
parents 4141d12de073
children a0ef462cf1a4
comparison
equal deleted inserted replaced
51530:fc710c993ec9 51531:f85f23f1479b
592 raise error.ProgrammingError(msg) 592 raise error.ProgrammingError(msg)
593 try: 593 try:
594 filename = self._filename(repo) 594 filename = self._filename(repo)
595 with repo.cachevfs(filename, b"w", atomictemp=True) as f: 595 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
596 self._write_header(f) 596 self._write_header(f)
597 nodecount = self._write_heads(f) 597 nodecount = self._write_heads(repo, f)
598 repo.ui.log( 598 repo.ui.log(
599 b'branchcache', 599 b'branchcache',
600 b'wrote %s with %d labels and %d nodes\n', 600 b'wrote %s with %d labels and %d nodes\n',
601 _branchcachedesc(repo), 601 _branchcachedesc(repo),
602 len(self._entries), 602 len(self._entries),
611 ) 611 )
612 612
613 def _write_header(self, fp) -> None: 613 def _write_header(self, fp) -> None:
614 raise NotImplementedError 614 raise NotImplementedError
615 615
616 def _write_heads(self, fp) -> int: 616 def _write_heads(self, repo, fp) -> int:
617 """write list of heads to a file 617 """write list of heads to a file
618 618
619 Return the number of heads written.""" 619 Return the number of heads written."""
620 nodecount = 0 620 nodecount = 0
621 for label, nodes in sorted(self._entries.items()): 621 for label, nodes in sorted(self._entries.items()):
825 cache can be used for this repository state at all. 825 cache can be used for this repository state at all.
826 826
827 The open/closed state is represented by a single letter 'o' or 'c'. 827 The open/closed state is represented by a single letter 'o' or 'c'.
828 This field can be used to avoid changelog reads when determining if a 828 This field can be used to avoid changelog reads when determining if a
829 branch head closes a branch or not. 829 branch head closes a branch or not.
830
831 Topological heads are not included in the listing and should be dispatched
832 on the right branch at read time. Obsolete topological heads should be
833 ignored.
830 """ 834 """
831 835
832 _base_filename = b"branch3" 836 _base_filename = b"branch3"
833 _default_key_hashes = (None, None) 837 _default_key_hashes = (None, None)
838
839 def _get_topo_heads(self, repo) -> List[int]:
840 """returns the topological head of a repoview content up to self.tiprev"""
841 cl = repo.changelog
842 if self.tiprev == nullrev:
843 return []
844 elif self.tiprev == cl.tiprev():
845 return cl.headrevs()
846 else:
847 # XXX passing tiprev as ceiling of cl.headrevs could be faster
848 heads = cl.headrevs(cl.revs(stop=self.tiprev))
849 return heads
834 850
835 def _write_header(self, fp) -> None: 851 def _write_header(self, fp) -> None:
836 cache_keys = { 852 cache_keys = {
837 b"tip-node": hex(self.tipnode), 853 b"tip-node": hex(self.tipnode),
838 b"tip-rev": b'%d' % self.tiprev, 854 b"tip-rev": b'%d' % self.tiprev,
842 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0]) 858 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0])
843 if self.key_hashes[1] is not None: 859 if self.key_hashes[1] is not None:
844 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1]) 860 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1])
845 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items())) 861 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items()))
846 fp.write(b" ".join(pieces) + b'\n') 862 fp.write(b" ".join(pieces) + b'\n')
863
864 def _write_heads(self, repo, fp) -> int:
865 """write list of heads to a file
866
867 Return the number of heads written."""
868 nodecount = 0
869 topo_heads = set(self._get_topo_heads(repo))
870 to_rev = repo.changelog.index.rev
871 for label, nodes in sorted(self._entries.items()):
872 label = encoding.fromlocal(label)
873 for node in nodes:
874 rev = to_rev(node)
875 if rev in topo_heads:
876 continue
877 if node in self._closednodes:
878 state = b'c'
879 else:
880 state = b'o'
881 nodecount += 1
882 fp.write(b"%s %s %s\n" % (hex(node), state, label))
883 return nodecount
847 884
848 @classmethod 885 @classmethod
849 def _load_header(cls, repo, lineiter): 886 def _load_header(cls, repo, lineiter):
850 header_line = next(lineiter) 887 header_line = next(lineiter)
851 pieces = header_line.rstrip(b'\n').split(b" ") 888 pieces = header_line.rstrip(b'\n').split(b" ")
866 else: 903 else:
867 msg = b"unknown cache key: %r" % k 904 msg = b"unknown cache key: %r" % k
868 raise ValueError(msg) 905 raise ValueError(msg)
869 args["key_hashes"] = (filtered_hash, obsolete_hash) 906 args["key_hashes"] = (filtered_hash, obsolete_hash)
870 return args 907 return args
908
909 def _load_heads(self, repo, lineiter):
910 """fully loads the branchcache by reading from the file using the line
911 iterator passed"""
912 super()._load_heads(repo, lineiter)
913 cl = repo.changelog
914 getbranchinfo = repo.revbranchcache().branchinfo
915 obsrevs = obsolete.getrevs(repo, b'obsolete')
916 to_node = cl.node
917 touched_branch = set()
918 for head in self._get_topo_heads(repo):
919 if head in obsrevs:
920 continue
921 node = to_node(head)
922 branch, closed = getbranchinfo(head)
923 self._entries.setdefault(branch, []).append(node)
924 if closed:
925 self._closednodes.add(node)
926 touched_branch.add(branch)
927 to_rev = cl.index.rev
928 for branch in touched_branch:
929 self._entries[branch].sort(key=to_rev)
871 930
872 def _compute_key_hashes(self, repo) -> Tuple[bytes]: 931 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
873 """return the cache key hashes that match this repoview state""" 932 """return the cache key hashes that match this repoview state"""
874 return scmutil.filtered_and_obsolete_hash( 933 return scmutil.filtered_and_obsolete_hash(
875 repo, 934 repo,