Mercurial > hg
changeset 47242:4abd474a10af
revlogv2: also keep track for the size of the "data" file
This is useful to make sure we always start writing at the right location,
without effort.
Differential Revision: https://phab.mercurial-scm.org/D10632
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 03 May 2021 12:35:35 +0200 |
parents | 2219853a1503 |
children | 3b04cf976c67 |
files | mercurial/configitems.py mercurial/revlog.py mercurial/revlogutils/docket.py |
diffstat | 3 files changed, 58 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/configitems.py Mon May 03 12:35:25 2021 +0200 +++ b/mercurial/configitems.py Mon May 03 12:35:35 2021 +0200 @@ -1156,7 +1156,6 @@ # - for stripping operation # - for rollback operation # * proper streaming (race free) of the docket file -# * store the data size in the docket to simplify sidedata rewrite. # * track garbage data to evemtually allow rewriting -existing- sidedata. # * Exchange-wise, we will also need to do something more efficient than # keeping references to the affected revlogs, especially memory-wise when
--- a/mercurial/revlog.py Mon May 03 12:35:25 2021 +0200 +++ b/mercurial/revlog.py Mon May 03 12:35:35 2021 +0200 @@ -2088,7 +2088,10 @@ if not self._inline: try: dfh = self._datafp(b"r+") - dfh.seek(0, os.SEEK_END) + if self._docket is None: + dfh.seek(0, os.SEEK_END) + else: + dfh.seek(self._docket.data_end, os.SEEK_SET) except IOError as inst: if inst.errno != errno.ENOENT: raise @@ -2455,16 +2458,10 @@ to `n - 1`'s sidedata being written after `n`'s data. TODO cache this in a docket file before getting out of experimental.""" - if self._format_version != REVLOGV2: + if self._docket is None: return self.end(prev) - - offset = 0 - for rev, entry in enumerate(self.index): - sidedata_end = entry[8] + entry[9] - # Sidedata for a previous rev has potentially been written after - # this rev's end, so take the max. - offset = max(self.end(rev), offset, sidedata_end) - return offset + else: + return self._docket.data_end def _writeentry(self, transaction, entry, data, link, offset, sidedata): # Files opened in a+ mode have inconsistent behavior on various @@ -2488,7 +2485,10 @@ else: ifh.seek(self._docket.index_end, os.SEEK_SET) if dfh: - dfh.seek(0, os.SEEK_END) + if self._docket is None: + dfh.seek(0, os.SEEK_END) + else: + dfh.seek(self._docket.data_end, os.SEEK_SET) curr = len(self) - 1 if not self._inline: @@ -2511,6 +2511,7 @@ self._enforceinlinesize(transaction) if self._docket is not None: self._docket.index_end = self._writinghandles[0].tell() + self._docket.data_end = self._writinghandles[1].tell() nodemaputil.setup_persistent_nodemap(transaction, self) @@ -2673,18 +2674,19 @@ return # first truncate the files on disk - end = self.start(rev) + data_end = self.start(rev) if not self._inline: - transaction.add(self._datafile, end) + transaction.add(self._datafile, data_end) end = rev * self.index.entry_size else: - end += rev * self.index.entry_size + end = data_end + (rev * self.index.entry_size) transaction.add(self._indexfile, end) if self._docket is not None: # XXX we could, leverage the docket while stripping. However it is # not powerfull enough at the time of this comment self._docket.index_end = end + self._docket.data_end = data_end self._docket.write(transaction, stripping=True) # then reset internal state in memory to forget those revisions @@ -3210,7 +3212,11 @@ # append the new sidedata with self._writing(transaction): ifh, dfh = self._writinghandles - dfh.seek(0, os.SEEK_END) + if self._docket is not None: + dfh.seek(self._docket.data_end, os.SEEK_SET) + else: + dfh.seek(0, os.SEEK_END) + current_offset = dfh.tell() for rev in range(startrev, endrev + 1): entry = self.index[rev] @@ -3242,6 +3248,8 @@ dfh.write(serialized_sidedata) new_entries.append(entry) current_offset += len(serialized_sidedata) + if self._docket is not None: + self._docket.data_end = dfh.tell() # rewrite the new index entries ifh.seek(startrev * self.index.entry_size)
--- a/mercurial/revlogutils/docket.py Mon May 03 12:35:25 2021 +0200 +++ b/mercurial/revlogutils/docket.py Mon May 03 12:35:35 2021 +0200 @@ -32,9 +32,11 @@ # * 4 bytes: revlog version # | This is mandatory as docket must be compatible with the previous # | revlog index header. -# * 8 bytes: size of index data -# * 8 bytes: pending size of index data -S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LL') +# * 8 bytes: size of index-data +# * 8 bytes: pending size of index-data +# * 8 bytes: size of data +# * 8 bytes: pending size of data +S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLL') class RevlogDocket(object): @@ -47,6 +49,8 @@ version_header=None, index_end=0, pending_index_end=0, + data_end=0, + pending_data_end=0, ): self._version_header = version_header self._read_only = bool(use_pending) @@ -54,14 +58,19 @@ self._radix = revlog.radix self._path = revlog._docket_file self._opener = revlog.opener - # this assert should be True as long as we have a single index filename + # thes asserts should be True as long as we have a single index filename assert index_end <= pending_index_end + assert data_end <= pending_data_end self._initial_index_end = index_end self._pending_index_end = pending_index_end + self._initial_data_end = data_end + self._pending_data_end = pending_data_end if use_pending: self._index_end = self._pending_index_end + self._data_end = self._pending_data_end else: self._index_end = self._initial_index_end + self._data_end = self._initial_data_end def index_filepath(self): """file path to the current index file associated to this docket""" @@ -78,6 +87,16 @@ self._index_end = new_size self._dirty = True + @property + def data_end(self): + return self._data_end + + @data_end.setter + def data_end(self, new_size): + if new_size != self._data_end: + self._data_end = new_size + self._dirty = True + def write(self, transaction, pending=False, stripping=False): """write the modification of disk if any @@ -102,15 +121,19 @@ def _serialize(self, pending=False): if pending: official_index_end = self._initial_index_end + official_data_end = self._initial_data_end else: official_index_end = self._index_end + official_data_end = self._data_end # this assert should be True as long as we have a single index filename - assert official_index_end <= self._index_end + assert official_data_end <= self._data_end data = ( self._version_header, official_index_end, self._index_end, + official_data_end, + self._data_end, ) return S_HEADER.pack(*data) @@ -127,12 +150,18 @@ def parse_docket(revlog, data, use_pending=False): """given some docket data return a docket object for the given revlog""" header = S_HEADER.unpack(data[: S_HEADER.size]) - version_header, index_size, pending_index_size = header + version_header = header[0] + index_size = header[1] + pending_index_size = header[2] + data_size = header[3] + pending_data_size = header[4] docket = RevlogDocket( revlog, use_pending=use_pending, version_header=version_header, index_end=index_size, pending_index_end=pending_index_size, + data_end=data_size, + pending_data_end=pending_data_size, ) return docket