mercurial/revlog.py
changeset 47425 e0a314bcbc9d
parent 47414 5fbac82a8780
child 47426 cac0e0621ceb
equal deleted inserted replaced
47424:f77404040776 47425:e0a314bcbc9d
    84     censor,
    84     censor,
    85     deltas as deltautil,
    85     deltas as deltautil,
    86     docket as docketutil,
    86     docket as docketutil,
    87     flagutil,
    87     flagutil,
    88     nodemap as nodemaputil,
    88     nodemap as nodemaputil,
       
    89     randomaccessfile,
    89     revlogv0,
    90     revlogv0,
    90     sidedata as sidedatautil,
    91     sidedata as sidedatautil,
    91 )
    92 )
    92 from .utils import (
    93 from .utils import (
    93     storageutil,
    94     storageutil,
   123 # Aliased for performance.
   124 # Aliased for performance.
   124 _zlibdecompress = zlib.decompress
   125 _zlibdecompress = zlib.decompress
   125 
   126 
   126 # max size of revlog with inline data
   127 # max size of revlog with inline data
   127 _maxinline = 131072
   128 _maxinline = 131072
   128 _chunksize = 1048576
       
   129 
   129 
   130 # Flag processors for REVIDX_ELLIPSIS.
   130 # Flag processors for REVIDX_ELLIPSIS.
   131 def ellipsisreadprocessor(rl, text):
   131 def ellipsisreadprocessor(rl, text):
   132     return text, False
   132     return text, False
   133 
   133 
   229 
   229 
   230 
   230 
   231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
   231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
   232 # signed integer)
   232 # signed integer)
   233 _maxentrysize = 0x7FFFFFFF
   233 _maxentrysize = 0x7FFFFFFF
   234 
       
   235 PARTIAL_READ_MSG = _(
       
   236     b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
       
   237 )
       
   238 
   234 
   239 FILE_TOO_SHORT_MSG = _(
   235 FILE_TOO_SHORT_MSG = _(
   240     b'cannot read from revlog %s;'
   236     b'cannot read from revlog %s;'
   241     b'  expected %d bytes from offset %d, data size is %d'
   237     b'  expected %d bytes from offset %d, data size is %d'
   242 )
   238 )
   603             self._parse_index = parse_index_v1_nodemap
   599             self._parse_index = parse_index_v1_nodemap
   604         elif use_rust_index:
   600         elif use_rust_index:
   605             self._parse_index = parse_index_v1_mixed
   601             self._parse_index = parse_index_v1_mixed
   606         try:
   602         try:
   607             d = self._parse_index(index_data, self._inline)
   603             d = self._parse_index(index_data, self._inline)
   608             index, _chunkcache = d
   604             index, chunkcache = d
   609             use_nodemap = (
   605             use_nodemap = (
   610                 not self._inline
   606                 not self._inline
   611                 and self._nodemap_file is not None
   607                 and self._nodemap_file is not None
   612                 and util.safehasattr(index, 'update_nodemap_data')
   608                 and util.safehasattr(index, 'update_nodemap_data')
   613             )
   609             )
   624                         index.update_nodemap_data(*nodemap_data)
   620                         index.update_nodemap_data(*nodemap_data)
   625         except (ValueError, IndexError):
   621         except (ValueError, IndexError):
   626             raise error.RevlogError(
   622             raise error.RevlogError(
   627                 _(b"index %s is corrupted") % self.display_id
   623                 _(b"index %s is corrupted") % self.display_id
   628             )
   624             )
   629         self.index, self._chunkcache = d
   625         self.index = index
   630         if not self._chunkcache:
   626         self._segmentfile = randomaccessfile.randomaccessfile(
   631             self._chunkclear()
   627             self.opener,
       
   628             (self._indexfile if self._inline else self._datafile),
       
   629             self._chunkcachesize,
       
   630             chunkcache,
       
   631         )
   632         # revnum -> (chain-length, sum-delta-length)
   632         # revnum -> (chain-length, sum-delta-length)
   633         self._chaininfocache = util.lrucachedict(500)
   633         self._chaininfocache = util.lrucachedict(500)
   634         # revlog header -> revlog compressor
   634         # revlog header -> revlog compressor
   635         self._decompressors = {}
   635         self._decompressors = {}
   636 
   636 
   707     def _datafp(self, mode=b'r'):
   707     def _datafp(self, mode=b'r'):
   708         """file object for the revlog's data file"""
   708         """file object for the revlog's data file"""
   709         return self.opener(self._datafile, mode=mode)
   709         return self.opener(self._datafile, mode=mode)
   710 
   710 
   711     @contextlib.contextmanager
   711     @contextlib.contextmanager
   712     def _datareadfp(self, existingfp=None):
       
   713         """file object suitable to read data"""
       
   714         # Use explicit file handle, if given.
       
   715         if existingfp is not None:
       
   716             yield existingfp
       
   717 
       
   718         # Use a file handle being actively used for writes, if available.
       
   719         # There is some danger to doing this because reads will seek the
       
   720         # file. However, _writeentry() performs a SEEK_END before all writes,
       
   721         # so we should be safe.
       
   722         elif self._writinghandles:
       
   723             if self._inline:
       
   724                 yield self._writinghandles[0]
       
   725             else:
       
   726                 yield self._writinghandles[1]
       
   727 
       
   728         # Otherwise open a new file handle.
       
   729         else:
       
   730             if self._inline:
       
   731                 func = self._indexfp
       
   732             else:
       
   733                 func = self._datafp
       
   734             with func() as fp:
       
   735                 yield fp
       
   736 
       
   737     @contextlib.contextmanager
       
   738     def _sidedatareadfp(self):
   712     def _sidedatareadfp(self):
   739         """file object suitable to read sidedata"""
   713         """file object suitable to read sidedata"""
   740         if self._writinghandles:
   714         if self._writinghandles:
   741             yield self._writinghandles[2]
   715             yield self._writinghandles[2]
   742         else:
   716         else:
   805                 nodemaputil.setup_persistent_nodemap(transaction, self)
   779                 nodemaputil.setup_persistent_nodemap(transaction, self)
   806 
   780 
   807     def clearcaches(self):
   781     def clearcaches(self):
   808         self._revisioncache = None
   782         self._revisioncache = None
   809         self._chainbasecache.clear()
   783         self._chainbasecache.clear()
   810         self._chunkcache = (0, b'')
   784         self._segmentfile.clear_cache()
   811         self._pcache = {}
   785         self._pcache = {}
   812         self._nodemap_docket = None
   786         self._nodemap_docket = None
   813         self.index.clearcaches()
   787         self.index.clearcaches()
   814         # The python code is the one responsible for validating the docket, we
   788         # The python code is the one responsible for validating the docket, we
   815         # end up having to refresh it here.
   789         # end up having to refresh it here.
  1627         returns True if text is different than what is stored.
  1601         returns True if text is different than what is stored.
  1628         """
  1602         """
  1629         p1, p2 = self.parents(node)
  1603         p1, p2 = self.parents(node)
  1630         return storageutil.hashrevisionsha1(text, p1, p2) != node
  1604         return storageutil.hashrevisionsha1(text, p1, p2) != node
  1631 
  1605 
  1632     def _cachesegment(self, offset, data):
       
  1633         """Add a segment to the revlog cache.
       
  1634 
       
  1635         Accepts an absolute offset and the data that is at that location.
       
  1636         """
       
  1637         o, d = self._chunkcache
       
  1638         # try to add to existing cache
       
  1639         if o + len(d) == offset and len(d) + len(data) < _chunksize:
       
  1640             self._chunkcache = o, d + data
       
  1641         else:
       
  1642             self._chunkcache = offset, data
       
  1643 
       
  1644     def _readsegment(self, offset, length, df=None):
       
  1645         """Load a segment of raw data from the revlog.
       
  1646 
       
  1647         Accepts an absolute offset, length to read, and an optional existing
       
  1648         file handle to read from.
       
  1649 
       
  1650         If an existing file handle is passed, it will be seeked and the
       
  1651         original seek position will NOT be restored.
       
  1652 
       
  1653         Returns a str or buffer of raw byte data.
       
  1654 
       
  1655         Raises if the requested number of bytes could not be read.
       
  1656         """
       
  1657         # Cache data both forward and backward around the requested
       
  1658         # data, in a fixed size window. This helps speed up operations
       
  1659         # involving reading the revlog backwards.
       
  1660         cachesize = self._chunkcachesize
       
  1661         realoffset = offset & ~(cachesize - 1)
       
  1662         reallength = (
       
  1663             (offset + length + cachesize) & ~(cachesize - 1)
       
  1664         ) - realoffset
       
  1665         with self._datareadfp(df) as df:
       
  1666             df.seek(realoffset)
       
  1667             d = df.read(reallength)
       
  1668 
       
  1669         self._cachesegment(realoffset, d)
       
  1670         if offset != realoffset or reallength != length:
       
  1671             startoffset = offset - realoffset
       
  1672             if len(d) - startoffset < length:
       
  1673                 filename = self._indexfile if self._inline else self._datafile
       
  1674                 got = len(d) - startoffset
       
  1675                 m = PARTIAL_READ_MSG % (filename, length, offset, got)
       
  1676                 raise error.RevlogError(m)
       
  1677             return util.buffer(d, startoffset, length)
       
  1678 
       
  1679         if len(d) < length:
       
  1680             filename = self._indexfile if self._inline else self._datafile
       
  1681             got = len(d) - startoffset
       
  1682             m = PARTIAL_READ_MSG % (filename, length, offset, got)
       
  1683             raise error.RevlogError(m)
       
  1684 
       
  1685         return d
       
  1686 
       
  1687     def _getsegment(self, offset, length, df=None):
       
  1688         """Obtain a segment of raw data from the revlog.
       
  1689 
       
  1690         Accepts an absolute offset, length of bytes to obtain, and an
       
  1691         optional file handle to the already-opened revlog. If the file
       
  1692         handle is used, it's original seek position will not be preserved.
       
  1693 
       
  1694         Requests for data may be returned from a cache.
       
  1695 
       
  1696         Returns a str or a buffer instance of raw byte data.
       
  1697         """
       
  1698         o, d = self._chunkcache
       
  1699         l = len(d)
       
  1700 
       
  1701         # is it in the cache?
       
  1702         cachestart = offset - o
       
  1703         cacheend = cachestart + length
       
  1704         if cachestart >= 0 and cacheend <= l:
       
  1705             if cachestart == 0 and cacheend == l:
       
  1706                 return d  # avoid a copy
       
  1707             return util.buffer(d, cachestart, cacheend - cachestart)
       
  1708 
       
  1709         return self._readsegment(offset, length, df=df)
       
  1710 
       
  1711     def _getsegmentforrevs(self, startrev, endrev, df=None):
  1606     def _getsegmentforrevs(self, startrev, endrev, df=None):
  1712         """Obtain a segment of raw data corresponding to a range of revisions.
  1607         """Obtain a segment of raw data corresponding to a range of revisions.
  1713 
  1608 
  1714         Accepts the start and end revisions and an optional already-open
  1609         Accepts the start and end revisions and an optional already-open
  1715         file handle to be used for reading. If the file handle is read, its
  1610         file handle to be used for reading. If the file handle is read, its
  1738         if self._inline:
  1633         if self._inline:
  1739             start += (startrev + 1) * self.index.entry_size
  1634             start += (startrev + 1) * self.index.entry_size
  1740             end += (endrev + 1) * self.index.entry_size
  1635             end += (endrev + 1) * self.index.entry_size
  1741         length = end - start
  1636         length = end - start
  1742 
  1637 
  1743         return start, self._getsegment(start, length, df=df)
  1638         return start, self._segmentfile.read_chunk(start, length, df)
  1744 
  1639 
  1745     def _chunk(self, rev, df=None):
  1640     def _chunk(self, rev, df=None):
  1746         """Obtain a single decompressed chunk for a revision.
  1641         """Obtain a single decompressed chunk for a revision.
  1747 
  1642 
  1748         Accepts an integer revision and an optional already-open file handle
  1643         Accepts an integer revision and an optional already-open file handle
  1829                     msg = b'unknown compression mode %d'
  1724                     msg = b'unknown compression mode %d'
  1830                     msg %= comp_mode
  1725                     msg %= comp_mode
  1831                     raise error.RevlogError(msg)
  1726                     raise error.RevlogError(msg)
  1832 
  1727 
  1833         return l
  1728         return l
  1834 
       
  1835     def _chunkclear(self):
       
  1836         """Clear the raw chunk cache."""
       
  1837         self._chunkcache = (0, b'')
       
  1838 
  1729 
  1839     def deltaparent(self, rev):
  1730     def deltaparent(self, rev):
  1840         """return deltaparent of the given revision"""
  1731         """return deltaparent of the given revision"""
  1841         base = self.index[rev][3]
  1732         base = self.index[rev][3]
  1842         if base == rev:
  1733         if base == rev:
  2041             if len(comp_segment) < sidedata_size:
  1932             if len(comp_segment) < sidedata_size:
  2042                 filename = self._sidedatafile
  1933                 filename = self._sidedatafile
  2043                 length = sidedata_size
  1934                 length = sidedata_size
  2044                 offset = sidedata_offset
  1935                 offset = sidedata_offset
  2045                 got = len(comp_segment)
  1936                 got = len(comp_segment)
  2046                 m = PARTIAL_READ_MSG % (filename, length, offset, got)
  1937                 m = randomaccessfile.PARTIAL_READ_MSG % (
       
  1938                     filename,
       
  1939                     length,
       
  1940                     offset,
       
  1941                     got,
       
  1942                 )
  2047                 raise error.RevlogError(m)
  1943                 raise error.RevlogError(m)
  2048 
  1944 
  2049         comp = self.index[rev][11]
  1945         comp = self.index[rev][11]
  2050         if comp == COMP_MODE_PLAIN:
  1946         if comp == COMP_MODE_PLAIN:
  2051             segment = comp_segment
  1947             segment = comp_segment
  2134             fp.flush()
  2030             fp.flush()
  2135             fp.close()
  2031             fp.close()
  2136             # We can't use the cached file handle after close(). So prevent
  2032             # We can't use the cached file handle after close(). So prevent
  2137             # its usage.
  2033             # its usage.
  2138             self._writinghandles = None
  2034             self._writinghandles = None
       
  2035             self._segmentfile.writing_handle = None
  2139 
  2036 
  2140         new_dfh = self._datafp(b'w+')
  2037         new_dfh = self._datafp(b'w+')
  2141         new_dfh.truncate(0)  # drop any potentially existing data
  2038         new_dfh.truncate(0)  # drop any potentially existing data
  2142         try:
  2039         try:
  2143             with self._indexfp() as read_ifh:
  2040             with self._indexfp() as read_ifh:
  2169                 # the temp file replace the real index when we exit the context
  2066                 # the temp file replace the real index when we exit the context
  2170                 # manager
  2067                 # manager
  2171 
  2068 
  2172             tr.replace(self._indexfile, trindex * self.index.entry_size)
  2069             tr.replace(self._indexfile, trindex * self.index.entry_size)
  2173             nodemaputil.setup_persistent_nodemap(tr, self)
  2070             nodemaputil.setup_persistent_nodemap(tr, self)
  2174             self._chunkclear()
  2071             self._segmentfile = randomaccessfile.randomaccessfile(
       
  2072                 self.opener,
       
  2073                 self._datafile,
       
  2074                 self._chunkcachesize,
       
  2075             )
  2175 
  2076 
  2176             if existing_handles:
  2077             if existing_handles:
  2177                 # switched from inline to conventional reopen the index
  2078                 # switched from inline to conventional reopen the index
  2178                 ifh = self.__index_write_fp()
  2079                 ifh = self.__index_write_fp()
  2179                 self._writinghandles = (ifh, new_dfh, None)
  2080                 self._writinghandles = (ifh, new_dfh, None)
       
  2081                 self._segmentfile.writing_handle = new_dfh
  2180                 new_dfh = None
  2082                 new_dfh = None
  2181         finally:
  2083         finally:
  2182             if new_dfh is not None:
  2084             if new_dfh is not None:
  2183                 new_dfh.close()
  2085                 new_dfh.close()
  2184 
  2086 
  2233                     transaction.add(self._indexfile, dsize + isize)
  2135                     transaction.add(self._indexfile, dsize + isize)
  2234                 else:
  2136                 else:
  2235                     transaction.add(self._indexfile, isize)
  2137                     transaction.add(self._indexfile, isize)
  2236                 # exposing all file handle for writing.
  2138                 # exposing all file handle for writing.
  2237                 self._writinghandles = (ifh, dfh, sdfh)
  2139                 self._writinghandles = (ifh, dfh, sdfh)
       
  2140                 self._segmentfile.writing_handle = ifh if self._inline else dfh
  2238                 yield
  2141                 yield
  2239                 if self._docket is not None:
  2142                 if self._docket is not None:
  2240                     self._write_docket(transaction)
  2143                     self._write_docket(transaction)
  2241             finally:
  2144             finally:
  2242                 self._writinghandles = None
  2145                 self._writinghandles = None
       
  2146                 self._segmentfile.writing_handle = None
  2243                 if dfh is not None:
  2147                 if dfh is not None:
  2244                     dfh.close()
  2148                     dfh.close()
  2245                 if sdfh is not None:
  2149                 if sdfh is not None:
  2246                     sdfh.close()
  2150                     sdfh.close()
  2247                 # closing the index file last to avoid exposing referent to
  2151                 # closing the index file last to avoid exposing referent to
  2871             self._docket.write(transaction, stripping=True)
  2775             self._docket.write(transaction, stripping=True)
  2872 
  2776 
  2873         # then reset internal state in memory to forget those revisions
  2777         # then reset internal state in memory to forget those revisions
  2874         self._revisioncache = None
  2778         self._revisioncache = None
  2875         self._chaininfocache = util.lrucachedict(500)
  2779         self._chaininfocache = util.lrucachedict(500)
  2876         self._chunkclear()
  2780         self._segmentfile.clear_cache()
  2877 
  2781 
  2878         del self.index[rev:-1]
  2782         del self.index[rev:-1]
  2879 
  2783 
  2880     def checksize(self):
  2784     def checksize(self):
  2881         """Check size of index and data files
  2785         """Check size of index and data files