mercurial/revlog.py
changeset 46722 3d740058b467
parent 46717 913485776542
child 46723 4cd214c9948d
equal deleted inserted replaced
46721:358737abeeef 46722:3d740058b467
   118 _maxinline = 131072
   118 _maxinline = 131072
   119 _chunksize = 1048576
   119 _chunksize = 1048576
   120 
   120 
   121 # Flag processors for REVIDX_ELLIPSIS.
   121 # Flag processors for REVIDX_ELLIPSIS.
   122 def ellipsisreadprocessor(rl, text):
   122 def ellipsisreadprocessor(rl, text):
   123     return text, False, {}
   123     return text, False
   124 
   124 
   125 
   125 
   126 def ellipsiswriteprocessor(rl, text, sidedata):
   126 def ellipsiswriteprocessor(rl, text):
   127     return text, False
   127     return text, False
   128 
   128 
   129 
   129 
   130 def ellipsisrawprocessor(rl, text):
   130 def ellipsisrawprocessor(rl, text):
   131     return False
   131     return False
   552         if b'maxdeltachainspan' in opts:
   552         if b'maxdeltachainspan' in opts:
   553             self._maxdeltachainspan = opts[b'maxdeltachainspan']
   553             self._maxdeltachainspan = opts[b'maxdeltachainspan']
   554         if self._mmaplargeindex and b'mmapindexthreshold' in opts:
   554         if self._mmaplargeindex and b'mmapindexthreshold' in opts:
   555             mmapindexthreshold = opts[b'mmapindexthreshold']
   555             mmapindexthreshold = opts[b'mmapindexthreshold']
   556         self.hassidedata = bool(opts.get(b'side-data', False))
   556         self.hassidedata = bool(opts.get(b'side-data', False))
   557         if self.hassidedata:
       
   558             self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
       
   559         self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
   557         self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
   560         withsparseread = bool(opts.get(b'with-sparse-read', False))
   558         withsparseread = bool(opts.get(b'with-sparse-read', False))
   561         # sparse-revlog forces sparse-read
   559         # sparse-revlog forces sparse-read
   562         self._withsparseread = self._sparserevlog or withsparseread
   560         self._withsparseread = self._sparserevlog or withsparseread
   563         if b'sparse-read-density-threshold' in opts:
   561         if b'sparse-read-density-threshold' in opts:
   854         return self.index[rev][0] & 0xFFFF
   852         return self.index[rev][0] & 0xFFFF
   855 
   853 
   856     def length(self, rev):
   854     def length(self, rev):
   857         return self.index[rev][1]
   855         return self.index[rev][1]
   858 
   856 
       
   857     def sidedata_length(self, rev):
       
   858         if self.version & 0xFFFF != REVLOGV2:
       
   859             return 0
       
   860         return self.index[rev][9]
       
   861 
   859     def rawsize(self, rev):
   862     def rawsize(self, rev):
   860         """return the length of the uncompressed text for a given revision"""
   863         """return the length of the uncompressed text for a given revision"""
   861         l = self.index[rev][2]
   864         l = self.index[rev][2]
   862         if l >= 0:
   865         if l >= 0:
   863             return l
   866             return l
   915             raise
   918             raise
   916 
   919 
   917     # Derived from index values.
   920     # Derived from index values.
   918 
   921 
   919     def end(self, rev):
   922     def end(self, rev):
   920         return self.start(rev) + self.length(rev)
   923         return self.start(rev) + self.length(rev) + self.sidedata_length(rev)
   921 
   924 
   922     def parents(self, node):
   925     def parents(self, node):
   923         i = self.index
   926         i = self.index
   924         d = i[self.rev(node)]
   927         d = i[self.rev(node)]
   925         return i[d[5]][7], i[d[6]][7]  # map revisions to nodes inline
   928         return i[d[5]][7], i[d[6]][7]  # map revisions to nodes inline
  1851         if raw:
  1854         if raw:
  1852             return text, flagutil.processflagsraw(self, text, flags)
  1855             return text, flagutil.processflagsraw(self, text, flags)
  1853         elif operation == b'read':
  1856         elif operation == b'read':
  1854             return flagutil.processflagsread(self, text, flags)
  1857             return flagutil.processflagsread(self, text, flags)
  1855         else:  # write operation
  1858         else:  # write operation
  1856             return flagutil.processflagswrite(self, text, flags, None)
  1859             return flagutil.processflagswrite(self, text, flags)
  1857 
  1860 
  1858     def revision(self, nodeorrev, _df=None, raw=False):
  1861     def revision(self, nodeorrev, _df=None, raw=False):
  1859         """return an uncompressed revision of a given node or revision
  1862         """return an uncompressed revision of a given node or revision
  1860         number.
  1863         number.
  1861 
  1864 
  1896 
  1899 
  1897         # ``rawtext`` is the text as stored inside the revlog. Might be the
  1900         # ``rawtext`` is the text as stored inside the revlog. Might be the
  1898         # revision or might need to be processed to retrieve the revision.
  1901         # revision or might need to be processed to retrieve the revision.
  1899         rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
  1902         rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
  1900 
  1903 
       
  1904         if self.version & 0xFFFF == REVLOGV2:
       
  1905             if rev is None:
       
  1906                 rev = self.rev(node)
       
  1907             sidedata = self._sidedata(rev)
       
  1908         else:
       
  1909             sidedata = {}
       
  1910 
  1901         if raw and validated:
  1911         if raw and validated:
  1902             # if we don't want to process the raw text and that raw
  1912             # if we don't want to process the raw text and that raw
  1903             # text is cached, we can exit early.
  1913             # text is cached, we can exit early.
  1904             return rawtext, {}
  1914             return rawtext, sidedata
  1905         if rev is None:
  1915         if rev is None:
  1906             rev = self.rev(node)
  1916             rev = self.rev(node)
  1907         # the revlog's flag for this revision
  1917         # the revlog's flag for this revision
  1908         # (usually alter its state or content)
  1918         # (usually alter its state or content)
  1909         flags = self.flags(rev)
  1919         flags = self.flags(rev)
  1910 
  1920 
  1911         if validated and flags == REVIDX_DEFAULT_FLAGS:
  1921         if validated and flags == REVIDX_DEFAULT_FLAGS:
  1912             # no extra flags set, no flag processor runs, text = rawtext
  1922             # no extra flags set, no flag processor runs, text = rawtext
  1913             return rawtext, {}
  1923             return rawtext, sidedata
  1914 
  1924 
  1915         sidedata = {}
       
  1916         if raw:
  1925         if raw:
  1917             validatehash = flagutil.processflagsraw(self, rawtext, flags)
  1926             validatehash = flagutil.processflagsraw(self, rawtext, flags)
  1918             text = rawtext
  1927             text = rawtext
  1919         else:
  1928         else:
  1920             try:
  1929             r = flagutil.processflagsread(self, rawtext, flags)
  1921                 r = flagutil.processflagsread(self, rawtext, flags)
  1930             text, validatehash = r
  1922             except error.SidedataHashError as exc:
       
  1923                 msg = _(b"integrity check failed on %s:%s sidedata key %d")
       
  1924                 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
       
  1925                 raise error.RevlogError(msg)
       
  1926             text, validatehash, sidedata = r
       
  1927         if validatehash:
  1931         if validatehash:
  1928             self.checkhash(text, node, rev=rev)
  1932             self.checkhash(text, node, rev=rev)
  1929         if not validated:
  1933         if not validated:
  1930             self._revisioncache = (node, rev, rawtext)
  1934             self._revisioncache = (node, rev, rawtext)
  1931 
  1935 
  1971             bins = bins[1:]
  1975             bins = bins[1:]
  1972 
  1976 
  1973         rawtext = mdiff.patches(basetext, bins)
  1977         rawtext = mdiff.patches(basetext, bins)
  1974         del basetext  # let us have a chance to free memory early
  1978         del basetext  # let us have a chance to free memory early
  1975         return (rev, rawtext, False)
  1979         return (rev, rawtext, False)
       
  1980 
       
  1981     def _sidedata(self, rev):
       
  1982         """Return the sidedata for a given revision number."""
       
  1983         index_entry = self.index[rev]
       
  1984         sidedata_offset = index_entry[8]
       
  1985         sidedata_size = index_entry[9]
       
  1986 
       
  1987         if self._inline:
       
  1988             sidedata_offset += self._io.size * (1 + rev)
       
  1989         if sidedata_size == 0:
       
  1990             return {}
       
  1991 
       
  1992         segment = self._getsegment(sidedata_offset, sidedata_size)
       
  1993         sidedata = sidedatautil.deserialize_sidedata(segment)
       
  1994         return sidedata
  1976 
  1995 
  1977     def rawdata(self, nodeorrev, _df=None):
  1996     def rawdata(self, nodeorrev, _df=None):
  1978         """return an uncompressed raw data of a given node or revision number.
  1997         """return an uncompressed raw data of a given node or revision number.
  1979 
  1998 
  1980         _df - an existing file handle to read from. (internal-only)
  1999         _df - an existing file handle to read from. (internal-only)
  2105                 _(b"attempted to add linkrev -1 to %s") % self.indexfile
  2124                 _(b"attempted to add linkrev -1 to %s") % self.indexfile
  2106             )
  2125             )
  2107 
  2126 
  2108         if sidedata is None:
  2127         if sidedata is None:
  2109             sidedata = {}
  2128             sidedata = {}
  2110             flags = flags & ~REVIDX_SIDEDATA
       
  2111         elif not self.hassidedata:
  2129         elif not self.hassidedata:
  2112             raise error.ProgrammingError(
  2130             raise error.ProgrammingError(
  2113                 _(b"trying to add sidedata to a revlog who don't support them")
  2131                 _(b"trying to add sidedata to a revlog who don't support them")
  2114             )
  2132             )
  2115         else:
       
  2116             flags |= REVIDX_SIDEDATA
       
  2117 
  2133 
  2118         if flags:
  2134         if flags:
  2119             node = node or self.hash(text, p1, p2)
  2135             node = node or self.hash(text, p1, p2)
  2120 
  2136 
  2121         rawtext, validatehash = flagutil.processflagswrite(
  2137         rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
  2122             self, text, flags, sidedata=sidedata
       
  2123         )
       
  2124 
  2138 
  2125         # If the flag processor modifies the revision data, ignore any provided
  2139         # If the flag processor modifies the revision data, ignore any provided
  2126         # cachedelta.
  2140         # cachedelta.
  2127         if rawtext != text:
  2141         if rawtext != text:
  2128             cachedelta = None
  2142             cachedelta = None
  2151             p2,
  2165             p2,
  2152             node,
  2166             node,
  2153             flags,
  2167             flags,
  2154             cachedelta=cachedelta,
  2168             cachedelta=cachedelta,
  2155             deltacomputer=deltacomputer,
  2169             deltacomputer=deltacomputer,
       
  2170             sidedata=sidedata,
  2156         )
  2171         )
  2157 
  2172 
  2158     def addrawrevision(
  2173     def addrawrevision(
  2159         self,
  2174         self,
  2160         rawtext,
  2175         rawtext,
  2164         p2,
  2179         p2,
  2165         node,
  2180         node,
  2166         flags,
  2181         flags,
  2167         cachedelta=None,
  2182         cachedelta=None,
  2168         deltacomputer=None,
  2183         deltacomputer=None,
       
  2184         sidedata=None,
  2169     ):
  2185     ):
  2170         """add a raw revision with known flags, node and parents
  2186         """add a raw revision with known flags, node and parents
  2171         useful when reusing a revision not stored in this revlog (ex: received
  2187         useful when reusing a revision not stored in this revlog (ex: received
  2172         over wire, or read from an external bundle).
  2188         over wire, or read from an external bundle).
  2173         """
  2189         """
  2186                 flags,
  2202                 flags,
  2187                 cachedelta,
  2203                 cachedelta,
  2188                 ifh,
  2204                 ifh,
  2189                 dfh,
  2205                 dfh,
  2190                 deltacomputer=deltacomputer,
  2206                 deltacomputer=deltacomputer,
       
  2207                 sidedata=sidedata,
  2191             )
  2208             )
  2192         finally:
  2209         finally:
  2193             if dfh:
  2210             if dfh:
  2194                 dfh.close()
  2211                 dfh.close()
  2195             ifh.close()
  2212             ifh.close()
  2279         cachedelta,
  2296         cachedelta,
  2280         ifh,
  2297         ifh,
  2281         dfh,
  2298         dfh,
  2282         alwayscache=False,
  2299         alwayscache=False,
  2283         deltacomputer=None,
  2300         deltacomputer=None,
       
  2301         sidedata=None,
  2284     ):
  2302     ):
  2285         """internal function to add revisions to the log
  2303         """internal function to add revisions to the log
  2286 
  2304 
  2287         see addrevision for argument descriptions.
  2305         see addrevision for argument descriptions.
  2288 
  2306 
  2348 
  2366 
  2349         revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
  2367         revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
  2350 
  2368 
  2351         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
  2369         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
  2352 
  2370 
       
  2371         if sidedata:
       
  2372             serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
       
  2373             sidedata_offset = offset + deltainfo.deltalen
       
  2374         else:
       
  2375             serialized_sidedata = b""
       
  2376             # Don't store the offset if the sidedata is empty, that way
       
  2377             # we can easily detect empty sidedata and they will be no different
       
  2378             # than ones we manually add.
       
  2379             sidedata_offset = 0
       
  2380 
  2353         e = (
  2381         e = (
  2354             offset_type(offset, flags),
  2382             offset_type(offset, flags),
  2355             deltainfo.deltalen,
  2383             deltainfo.deltalen,
  2356             textlen,
  2384             textlen,
  2357             deltainfo.base,
  2385             deltainfo.base,
  2358             link,
  2386             link,
  2359             p1r,
  2387             p1r,
  2360             p2r,
  2388             p2r,
  2361             node,
  2389             node,
  2362             0,
  2390             sidedata_offset,
  2363             0,
  2391             len(serialized_sidedata),
  2364         )
  2392         )
  2365 
  2393 
  2366         if self.version & 0xFFFF != REVLOGV2:
  2394         if self.version & 0xFFFF != REVLOGV2:
  2367             e = e[:8]
  2395             e = e[:8]
  2368 
  2396 
  2369         self.index.append(e)
  2397         self.index.append(e)
  2370 
       
  2371         entry = self._io.packentry(e, self.node, self.version, curr)
  2398         entry = self._io.packentry(e, self.node, self.version, curr)
  2372         self._writeentry(
  2399         self._writeentry(
  2373             transaction, ifh, dfh, entry, deltainfo.data, link, offset
  2400             transaction,
       
  2401             ifh,
       
  2402             dfh,
       
  2403             entry,
       
  2404             deltainfo.data,
       
  2405             link,
       
  2406             offset,
       
  2407             serialized_sidedata,
  2374         )
  2408         )
  2375 
  2409 
  2376         rawtext = btext[0]
  2410         rawtext = btext[0]
  2377 
  2411 
  2378         if alwayscache and rawtext is None:
  2412         if alwayscache and rawtext is None:
  2381         if type(rawtext) == bytes:  # only accept immutable objects
  2415         if type(rawtext) == bytes:  # only accept immutable objects
  2382             self._revisioncache = (node, curr, rawtext)
  2416             self._revisioncache = (node, curr, rawtext)
  2383         self._chainbasecache[curr] = deltainfo.chainbase
  2417         self._chainbasecache[curr] = deltainfo.chainbase
  2384         return curr
  2418         return curr
  2385 
  2419 
  2386     def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
  2420     def _writeentry(
       
  2421         self, transaction, ifh, dfh, entry, data, link, offset, sidedata
       
  2422     ):
  2387         # Files opened in a+ mode have inconsistent behavior on various
  2423         # Files opened in a+ mode have inconsistent behavior on various
  2388         # platforms. Windows requires that a file positioning call be made
  2424         # platforms. Windows requires that a file positioning call be made
  2389         # when the file handle transitions between reads and writes. See
  2425         # when the file handle transitions between reads and writes. See
  2390         # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
  2426         # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
  2391         # platforms, Python or the platform itself can be buggy. Some versions
  2427         # platforms, Python or the platform itself can be buggy. Some versions
  2405             transaction.add(self.datafile, offset)
  2441             transaction.add(self.datafile, offset)
  2406             transaction.add(self.indexfile, curr * len(entry))
  2442             transaction.add(self.indexfile, curr * len(entry))
  2407             if data[0]:
  2443             if data[0]:
  2408                 dfh.write(data[0])
  2444                 dfh.write(data[0])
  2409             dfh.write(data[1])
  2445             dfh.write(data[1])
       
  2446             if sidedata:
       
  2447                 dfh.write(sidedata)
  2410             ifh.write(entry)
  2448             ifh.write(entry)
  2411         else:
  2449         else:
  2412             offset += curr * self._io.size
  2450             offset += curr * self._io.size
  2413             transaction.add(self.indexfile, offset)
  2451             transaction.add(self.indexfile, offset)
  2414             ifh.write(entry)
  2452             ifh.write(entry)
  2415             ifh.write(data[0])
  2453             ifh.write(data[0])
  2416             ifh.write(data[1])
  2454             ifh.write(data[1])
       
  2455             if sidedata:
       
  2456                 ifh.write(sidedata)
  2417             self._enforceinlinesize(transaction, ifh)
  2457             self._enforceinlinesize(transaction, ifh)
  2418         nodemaputil.setup_persistent_nodemap(transaction, self)
  2458         nodemaputil.setup_persistent_nodemap(transaction, self)
  2419 
  2459 
  2420     def addgroup(
  2460     def addgroup(
  2421         self,
  2461         self,