Mercurial > hg
comparison mercurial/branchmap.py @ 51531:f85f23f1479b
branchcache: skip entries that are topological heads in the on disk file
In the majority of cases, topological heads are also branch heads. We have
efficient way to get the topological heads and efficient way to retrieve
their branch information. So there is little value in putting them in the branch
cache file explicitly. On the contrary, writing them explicitly tend to create
very large cache file that are inefficient to read and update.
So the branch cache v3 format is no longer including them. This changeset focus
on the format aspect and have no focus on the performance aspect. We will cover
that later.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Thu, 07 Mar 2024 10:55:22 +0100 |
parents | 4141d12de073 |
children | a0ef462cf1a4 |
comparison
equal
deleted
inserted
replaced
51530:fc710c993ec9 | 51531:f85f23f1479b |
---|---|
592 raise error.ProgrammingError(msg) | 592 raise error.ProgrammingError(msg) |
593 try: | 593 try: |
594 filename = self._filename(repo) | 594 filename = self._filename(repo) |
595 with repo.cachevfs(filename, b"w", atomictemp=True) as f: | 595 with repo.cachevfs(filename, b"w", atomictemp=True) as f: |
596 self._write_header(f) | 596 self._write_header(f) |
597 nodecount = self._write_heads(f) | 597 nodecount = self._write_heads(repo, f) |
598 repo.ui.log( | 598 repo.ui.log( |
599 b'branchcache', | 599 b'branchcache', |
600 b'wrote %s with %d labels and %d nodes\n', | 600 b'wrote %s with %d labels and %d nodes\n', |
601 _branchcachedesc(repo), | 601 _branchcachedesc(repo), |
602 len(self._entries), | 602 len(self._entries), |
611 ) | 611 ) |
612 | 612 |
613 def _write_header(self, fp) -> None: | 613 def _write_header(self, fp) -> None: |
614 raise NotImplementedError | 614 raise NotImplementedError |
615 | 615 |
616 def _write_heads(self, fp) -> int: | 616 def _write_heads(self, repo, fp) -> int: |
617 """write list of heads to a file | 617 """write list of heads to a file |
618 | 618 |
619 Return the number of heads written.""" | 619 Return the number of heads written.""" |
620 nodecount = 0 | 620 nodecount = 0 |
621 for label, nodes in sorted(self._entries.items()): | 621 for label, nodes in sorted(self._entries.items()): |
825 cache can be used for this repository state at all. | 825 cache can be used for this repository state at all. |
826 | 826 |
827 The open/closed state is represented by a single letter 'o' or 'c'. | 827 The open/closed state is represented by a single letter 'o' or 'c'. |
828 This field can be used to avoid changelog reads when determining if a | 828 This field can be used to avoid changelog reads when determining if a |
829 branch head closes a branch or not. | 829 branch head closes a branch or not. |
830 | |
831 Topological heads are not included in the listing and should be dispatched | |
832 on the right branch at read time. Obsolete topological heads should be | |
833 ignored. | |
830 """ | 834 """ |
831 | 835 |
832 _base_filename = b"branch3" | 836 _base_filename = b"branch3" |
833 _default_key_hashes = (None, None) | 837 _default_key_hashes = (None, None) |
838 | |
839 def _get_topo_heads(self, repo) -> List[int]: | |
840 """returns the topological head of a repoview content up to self.tiprev""" | |
841 cl = repo.changelog | |
842 if self.tiprev == nullrev: | |
843 return [] | |
844 elif self.tiprev == cl.tiprev(): | |
845 return cl.headrevs() | |
846 else: | |
847 # XXX passing tiprev as ceiling of cl.headrevs could be faster | |
848 heads = cl.headrevs(cl.revs(stop=self.tiprev)) | |
849 return heads | |
834 | 850 |
835 def _write_header(self, fp) -> None: | 851 def _write_header(self, fp) -> None: |
836 cache_keys = { | 852 cache_keys = { |
837 b"tip-node": hex(self.tipnode), | 853 b"tip-node": hex(self.tipnode), |
838 b"tip-rev": b'%d' % self.tiprev, | 854 b"tip-rev": b'%d' % self.tiprev, |
842 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0]) | 858 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0]) |
843 if self.key_hashes[1] is not None: | 859 if self.key_hashes[1] is not None: |
844 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1]) | 860 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1]) |
845 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items())) | 861 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items())) |
846 fp.write(b" ".join(pieces) + b'\n') | 862 fp.write(b" ".join(pieces) + b'\n') |
863 | |
864 def _write_heads(self, repo, fp) -> int: | |
865 """write list of heads to a file | |
866 | |
867 Return the number of heads written.""" | |
868 nodecount = 0 | |
869 topo_heads = set(self._get_topo_heads(repo)) | |
870 to_rev = repo.changelog.index.rev | |
871 for label, nodes in sorted(self._entries.items()): | |
872 label = encoding.fromlocal(label) | |
873 for node in nodes: | |
874 rev = to_rev(node) | |
875 if rev in topo_heads: | |
876 continue | |
877 if node in self._closednodes: | |
878 state = b'c' | |
879 else: | |
880 state = b'o' | |
881 nodecount += 1 | |
882 fp.write(b"%s %s %s\n" % (hex(node), state, label)) | |
883 return nodecount | |
847 | 884 |
848 @classmethod | 885 @classmethod |
849 def _load_header(cls, repo, lineiter): | 886 def _load_header(cls, repo, lineiter): |
850 header_line = next(lineiter) | 887 header_line = next(lineiter) |
851 pieces = header_line.rstrip(b'\n').split(b" ") | 888 pieces = header_line.rstrip(b'\n').split(b" ") |
866 else: | 903 else: |
867 msg = b"unknown cache key: %r" % k | 904 msg = b"unknown cache key: %r" % k |
868 raise ValueError(msg) | 905 raise ValueError(msg) |
869 args["key_hashes"] = (filtered_hash, obsolete_hash) | 906 args["key_hashes"] = (filtered_hash, obsolete_hash) |
870 return args | 907 return args |
908 | |
909 def _load_heads(self, repo, lineiter): | |
910 """fully loads the branchcache by reading from the file using the line | |
911 iterator passed""" | |
912 super()._load_heads(repo, lineiter) | |
913 cl = repo.changelog | |
914 getbranchinfo = repo.revbranchcache().branchinfo | |
915 obsrevs = obsolete.getrevs(repo, b'obsolete') | |
916 to_node = cl.node | |
917 touched_branch = set() | |
918 for head in self._get_topo_heads(repo): | |
919 if head in obsrevs: | |
920 continue | |
921 node = to_node(head) | |
922 branch, closed = getbranchinfo(head) | |
923 self._entries.setdefault(branch, []).append(node) | |
924 if closed: | |
925 self._closednodes.add(node) | |
926 touched_branch.add(branch) | |
927 to_rev = cl.index.rev | |
928 for branch in touched_branch: | |
929 self._entries[branch].sort(key=to_rev) | |
871 | 930 |
872 def _compute_key_hashes(self, repo) -> Tuple[bytes]: | 931 def _compute_key_hashes(self, repo) -> Tuple[bytes]: |
873 """return the cache key hashes that match this repoview state""" | 932 """return the cache key hashes that match this repoview state""" |
874 return scmutil.filtered_and_obsolete_hash( | 933 return scmutil.filtered_and_obsolete_hash( |
875 repo, | 934 repo, |