revlogv2: also keep track for the size of the "data" file
This is useful to make sure we always start writing at the right location,
without effort.
Differential Revision: https://phab.mercurial-scm.org/D10632
--- a/mercurial/configitems.py Mon May 03 12:35:25 2021 +0200
+++ b/mercurial/configitems.py Mon May 03 12:35:35 2021 +0200
@@ -1156,7 +1156,6 @@
# - for stripping operation
# - for rollback operation
# * proper streaming (race free) of the docket file
-# * store the data size in the docket to simplify sidedata rewrite.
# * track garbage data to evemtually allow rewriting -existing- sidedata.
# * Exchange-wise, we will also need to do something more efficient than
# keeping references to the affected revlogs, especially memory-wise when
--- a/mercurial/revlog.py Mon May 03 12:35:25 2021 +0200
+++ b/mercurial/revlog.py Mon May 03 12:35:35 2021 +0200
@@ -2088,7 +2088,10 @@
if not self._inline:
try:
dfh = self._datafp(b"r+")
- dfh.seek(0, os.SEEK_END)
+ if self._docket is None:
+ dfh.seek(0, os.SEEK_END)
+ else:
+ dfh.seek(self._docket.data_end, os.SEEK_SET)
except IOError as inst:
if inst.errno != errno.ENOENT:
raise
@@ -2455,16 +2458,10 @@
to `n - 1`'s sidedata being written after `n`'s data.
TODO cache this in a docket file before getting out of experimental."""
- if self._format_version != REVLOGV2:
+ if self._docket is None:
return self.end(prev)
-
- offset = 0
- for rev, entry in enumerate(self.index):
- sidedata_end = entry[8] + entry[9]
- # Sidedata for a previous rev has potentially been written after
- # this rev's end, so take the max.
- offset = max(self.end(rev), offset, sidedata_end)
- return offset
+ else:
+ return self._docket.data_end
def _writeentry(self, transaction, entry, data, link, offset, sidedata):
# Files opened in a+ mode have inconsistent behavior on various
@@ -2488,7 +2485,10 @@
else:
ifh.seek(self._docket.index_end, os.SEEK_SET)
if dfh:
- dfh.seek(0, os.SEEK_END)
+ if self._docket is None:
+ dfh.seek(0, os.SEEK_END)
+ else:
+ dfh.seek(self._docket.data_end, os.SEEK_SET)
curr = len(self) - 1
if not self._inline:
@@ -2511,6 +2511,7 @@
self._enforceinlinesize(transaction)
if self._docket is not None:
self._docket.index_end = self._writinghandles[0].tell()
+ self._docket.data_end = self._writinghandles[1].tell()
nodemaputil.setup_persistent_nodemap(transaction, self)
@@ -2673,18 +2674,19 @@
return
# first truncate the files on disk
- end = self.start(rev)
+ data_end = self.start(rev)
if not self._inline:
- transaction.add(self._datafile, end)
+ transaction.add(self._datafile, data_end)
end = rev * self.index.entry_size
else:
- end += rev * self.index.entry_size
+ end = data_end + (rev * self.index.entry_size)
transaction.add(self._indexfile, end)
if self._docket is not None:
# XXX we could, leverage the docket while stripping. However it is
# not powerfull enough at the time of this comment
self._docket.index_end = end
+ self._docket.data_end = data_end
self._docket.write(transaction, stripping=True)
# then reset internal state in memory to forget those revisions
@@ -3210,7 +3212,11 @@
# append the new sidedata
with self._writing(transaction):
ifh, dfh = self._writinghandles
- dfh.seek(0, os.SEEK_END)
+ if self._docket is not None:
+ dfh.seek(self._docket.data_end, os.SEEK_SET)
+ else:
+ dfh.seek(0, os.SEEK_END)
+
current_offset = dfh.tell()
for rev in range(startrev, endrev + 1):
entry = self.index[rev]
@@ -3242,6 +3248,8 @@
dfh.write(serialized_sidedata)
new_entries.append(entry)
current_offset += len(serialized_sidedata)
+ if self._docket is not None:
+ self._docket.data_end = dfh.tell()
# rewrite the new index entries
ifh.seek(startrev * self.index.entry_size)
--- a/mercurial/revlogutils/docket.py Mon May 03 12:35:25 2021 +0200
+++ b/mercurial/revlogutils/docket.py Mon May 03 12:35:35 2021 +0200
@@ -32,9 +32,11 @@
# * 4 bytes: revlog version
# | This is mandatory as docket must be compatible with the previous
# | revlog index header.
-# * 8 bytes: size of index data
-# * 8 bytes: pending size of index data
-S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LL')
+# * 8 bytes: size of index-data
+# * 8 bytes: pending size of index-data
+# * 8 bytes: size of data
+# * 8 bytes: pending size of data
+S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLL')
class RevlogDocket(object):
@@ -47,6 +49,8 @@
version_header=None,
index_end=0,
pending_index_end=0,
+ data_end=0,
+ pending_data_end=0,
):
self._version_header = version_header
self._read_only = bool(use_pending)
@@ -54,14 +58,19 @@
self._radix = revlog.radix
self._path = revlog._docket_file
self._opener = revlog.opener
- # this assert should be True as long as we have a single index filename
+ # thes asserts should be True as long as we have a single index filename
assert index_end <= pending_index_end
+ assert data_end <= pending_data_end
self._initial_index_end = index_end
self._pending_index_end = pending_index_end
+ self._initial_data_end = data_end
+ self._pending_data_end = pending_data_end
if use_pending:
self._index_end = self._pending_index_end
+ self._data_end = self._pending_data_end
else:
self._index_end = self._initial_index_end
+ self._data_end = self._initial_data_end
def index_filepath(self):
"""file path to the current index file associated to this docket"""
@@ -78,6 +87,16 @@
self._index_end = new_size
self._dirty = True
+ @property
+ def data_end(self):
+ return self._data_end
+
+ @data_end.setter
+ def data_end(self, new_size):
+ if new_size != self._data_end:
+ self._data_end = new_size
+ self._dirty = True
+
def write(self, transaction, pending=False, stripping=False):
"""write the modification of disk if any
@@ -102,15 +121,19 @@
def _serialize(self, pending=False):
if pending:
official_index_end = self._initial_index_end
+ official_data_end = self._initial_data_end
else:
official_index_end = self._index_end
+ official_data_end = self._data_end
# this assert should be True as long as we have a single index filename
- assert official_index_end <= self._index_end
+ assert official_data_end <= self._data_end
data = (
self._version_header,
official_index_end,
self._index_end,
+ official_data_end,
+ self._data_end,
)
return S_HEADER.pack(*data)
@@ -127,12 +150,18 @@
def parse_docket(revlog, data, use_pending=False):
"""given some docket data return a docket object for the given revlog"""
header = S_HEADER.unpack(data[: S_HEADER.size])
- version_header, index_size, pending_index_size = header
+ version_header = header[0]
+ index_size = header[1]
+ pending_index_size = header[2]
+ data_size = header[3]
+ pending_data_size = header[4]
docket = RevlogDocket(
revlog,
use_pending=use_pending,
version_header=version_header,
index_end=index_size,
pending_index_end=pending_index_size,
+ data_end=data_size,
+ pending_data_end=pending_data_size,
)
return docket