comparison mercurial/revlog.py @ 47426:cac0e0621ceb

revlog: use file read caching for sidedata The previous changeset extracted this caching logic from the revlog class into a new class. Adding a new instance of that class allows using the same logic for side data. Differential Revision: https://phab.mercurial-scm.org/D10879
author Simon Sapin <simon.sapin@octobus.net>
date Tue, 15 Jun 2021 12:34:14 +0200
parents e0a314bcbc9d
children 9cecc222c536
comparison
equal deleted inserted replaced
47425:e0a314bcbc9d 47426:cac0e0621ceb
627 self.opener, 627 self.opener,
628 (self._indexfile if self._inline else self._datafile), 628 (self._indexfile if self._inline else self._datafile),
629 self._chunkcachesize, 629 self._chunkcachesize,
630 chunkcache, 630 chunkcache,
631 ) 631 )
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 self.opener,
634 self._sidedatafile,
635 self._chunkcachesize,
636 )
632 # revnum -> (chain-length, sum-delta-length) 637 # revnum -> (chain-length, sum-delta-length)
633 self._chaininfocache = util.lrucachedict(500) 638 self._chaininfocache = util.lrucachedict(500)
634 # revlog header -> revlog compressor 639 # revlog header -> revlog compressor
635 self._decompressors = {} 640 self._decompressors = {}
636 641
780 785
781 def clearcaches(self): 786 def clearcaches(self):
782 self._revisioncache = None 787 self._revisioncache = None
783 self._chainbasecache.clear() 788 self._chainbasecache.clear()
784 self._segmentfile.clear_cache() 789 self._segmentfile.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
785 self._pcache = {} 791 self._pcache = {}
786 self._nodemap_docket = None 792 self._nodemap_docket = None
787 self.index.clearcaches() 793 self.index.clearcaches()
788 # The python code is the one responsible for validating the docket, we 794 # The python code is the one responsible for validating the docket, we
789 # end up having to refresh it here. 795 # end up having to refresh it here.
1914 if self._inline: 1920 if self._inline:
1915 sidedata_offset += self.index.entry_size * (1 + rev) 1921 sidedata_offset += self.index.entry_size * (1 + rev)
1916 if sidedata_size == 0: 1922 if sidedata_size == 0:
1917 return {} 1923 return {}
1918 1924
1919 # XXX this need caching, as we do for data 1925 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1920 with self._sidedatareadfp() as sdf: 1926 filename = self._sidedatafile
1921 if self._docket.sidedata_end < sidedata_offset + sidedata_size: 1927 end = self._docket.sidedata_end
1922 filename = self._sidedatafile 1928 offset = sidedata_offset
1923 end = self._docket.sidedata_end 1929 length = sidedata_size
1924 offset = sidedata_offset 1930 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1925 length = sidedata_size 1931 raise error.RevlogError(m)
1926 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end) 1932
1927 raise error.RevlogError(m) 1933 comp_segment = self._segmentfile_sidedata.read_chunk(
1928 1934 sidedata_offset, sidedata_size
1929 sdf.seek(sidedata_offset, os.SEEK_SET) 1935 )
1930 comp_segment = sdf.read(sidedata_size)
1931
1932 if len(comp_segment) < sidedata_size:
1933 filename = self._sidedatafile
1934 length = sidedata_size
1935 offset = sidedata_offset
1936 got = len(comp_segment)
1937 m = randomaccessfile.PARTIAL_READ_MSG % (
1938 filename,
1939 length,
1940 offset,
1941 got,
1942 )
1943 raise error.RevlogError(m)
1944 1936
1945 comp = self.index[rev][11] 1937 comp = self.index[rev][11]
1946 if comp == COMP_MODE_PLAIN: 1938 if comp == COMP_MODE_PLAIN:
1947 segment = comp_segment 1939 segment = comp_segment
1948 elif comp == COMP_MODE_DEFAULT: 1940 elif comp == COMP_MODE_DEFAULT:
2031 fp.close() 2023 fp.close()
2032 # We can't use the cached file handle after close(). So prevent 2024 # We can't use the cached file handle after close(). So prevent
2033 # its usage. 2025 # its usage.
2034 self._writinghandles = None 2026 self._writinghandles = None
2035 self._segmentfile.writing_handle = None 2027 self._segmentfile.writing_handle = None
2028 # No need to deal with sidedata writing handle as it is only
2029 # relevant with revlog-v2 which is never inline, not reaching
2030 # this code
2036 2031
2037 new_dfh = self._datafp(b'w+') 2032 new_dfh = self._datafp(b'w+')
2038 new_dfh.truncate(0) # drop any potentially existing data 2033 new_dfh.truncate(0) # drop any potentially existing data
2039 try: 2034 try:
2040 with self._indexfp() as read_ifh: 2035 with self._indexfp() as read_ifh:
2078 # switched from inline to conventional reopen the index 2073 # switched from inline to conventional reopen the index
2079 ifh = self.__index_write_fp() 2074 ifh = self.__index_write_fp()
2080 self._writinghandles = (ifh, new_dfh, None) 2075 self._writinghandles = (ifh, new_dfh, None)
2081 self._segmentfile.writing_handle = new_dfh 2076 self._segmentfile.writing_handle = new_dfh
2082 new_dfh = None 2077 new_dfh = None
2078 # No need to deal with sidedata writing handle as it is only
2079 # relevant with revlog-v2 which is never inline, not reaching
2080 # this code
2083 finally: 2081 finally:
2084 if new_dfh is not None: 2082 if new_dfh is not None:
2085 new_dfh.close() 2083 new_dfh.close()
2086 2084
2087 def _nodeduplicatecallback(self, transaction, node): 2085 def _nodeduplicatecallback(self, transaction, node):
2136 else: 2134 else:
2137 transaction.add(self._indexfile, isize) 2135 transaction.add(self._indexfile, isize)
2138 # exposing all file handle for writing. 2136 # exposing all file handle for writing.
2139 self._writinghandles = (ifh, dfh, sdfh) 2137 self._writinghandles = (ifh, dfh, sdfh)
2140 self._segmentfile.writing_handle = ifh if self._inline else dfh 2138 self._segmentfile.writing_handle = ifh if self._inline else dfh
2139 self._segmentfile_sidedata.writing_handle = sdfh
2141 yield 2140 yield
2142 if self._docket is not None: 2141 if self._docket is not None:
2143 self._write_docket(transaction) 2142 self._write_docket(transaction)
2144 finally: 2143 finally:
2145 self._writinghandles = None 2144 self._writinghandles = None
2146 self._segmentfile.writing_handle = None 2145 self._segmentfile.writing_handle = None
2146 self._segmentfile_sidedata.writing_handle = None
2147 if dfh is not None: 2147 if dfh is not None:
2148 dfh.close() 2148 dfh.close()
2149 if sdfh is not None: 2149 if sdfh is not None:
2150 sdfh.close() 2150 sdfh.close()
2151 # closing the index file last to avoid exposing referent to 2151 # closing the index file last to avoid exposing referent to
2776 2776
2777 # then reset internal state in memory to forget those revisions 2777 # then reset internal state in memory to forget those revisions
2778 self._revisioncache = None 2778 self._revisioncache = None
2779 self._chaininfocache = util.lrucachedict(500) 2779 self._chaininfocache = util.lrucachedict(500)
2780 self._segmentfile.clear_cache() 2780 self._segmentfile.clear_cache()
2781 self._segmentfile_sidedata.clear_cache()
2781 2782
2782 del self.index[rev:-1] 2783 del self.index[rev:-1]
2783 2784
2784 def checksize(self): 2785 def checksize(self):
2785 """Check size of index and data files 2786 """Check size of index and data files