view mercurial/revlogutils/docket.py @ 47242:4abd474a10af

revlogv2: also keep track for the size of the "data" file This is useful to make sure we always start writing at the right location, without effort. Differential Revision: https://phab.mercurial-scm.org/D10632
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 03 May 2021 12:35:35 +0200
parents 2219853a1503
children ff9fd7107d11
line wrap: on
line source

# docket - code related to revlog "docket"
#
# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

### Revlog docket file
#
# The revlog is stored on disk using multiple files:
#
# * a small docket file, containing metadata and a pointer,
#
# * an index file, containing fixed width information about revisions,
#
# * a data file, containing variable width data for these revisions,

from __future__ import absolute_import

import struct

from .. import (
    error,
)

from . import (
    constants,
)

# Docket format
#
# * 4 bytes: revlog version
#          |   This is mandatory as docket must be compatible with the previous
#          |   revlog index header.
# * 8 bytes: size of index-data
# * 8 bytes: pending size of index-data
# * 8 bytes: size of data
# * 8 bytes: pending size of data
S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLL')


class RevlogDocket(object):
    """metadata associated with revlog"""

    def __init__(
        self,
        revlog,
        use_pending=False,
        version_header=None,
        index_end=0,
        pending_index_end=0,
        data_end=0,
        pending_data_end=0,
    ):
        self._version_header = version_header
        self._read_only = bool(use_pending)
        self._dirty = False
        self._radix = revlog.radix
        self._path = revlog._docket_file
        self._opener = revlog.opener
        # thes asserts should be True as long as we have a single index filename
        assert index_end <= pending_index_end
        assert data_end <= pending_data_end
        self._initial_index_end = index_end
        self._pending_index_end = pending_index_end
        self._initial_data_end = data_end
        self._pending_data_end = pending_data_end
        if use_pending:
            self._index_end = self._pending_index_end
            self._data_end = self._pending_data_end
        else:
            self._index_end = self._initial_index_end
            self._data_end = self._initial_data_end

    def index_filepath(self):
        """file path to the current index file associated to this docket"""
        # very simplistic version at first
        return b"%s.idx" % self._radix

    @property
    def index_end(self):
        return self._index_end

    @index_end.setter
    def index_end(self, new_size):
        if new_size != self._index_end:
            self._index_end = new_size
            self._dirty = True

    @property
    def data_end(self):
        return self._data_end

    @data_end.setter
    def data_end(self, new_size):
        if new_size != self._data_end:
            self._data_end = new_size
            self._dirty = True

    def write(self, transaction, pending=False, stripping=False):
        """write the modification of disk if any

        This make the new content visible to all process"""
        if not self._dirty:
            return False
        else:
            if self._read_only:
                msg = b'writing read-only docket: %s'
                msg %= self._path
                raise error.ProgrammingError(msg)
            if not stripping:
                # XXX we could, leverage the docket while stripping. However it
                # is not powerfull enough at the time of this comment
                transaction.addbackup(self._path, location=b'store')
            with self._opener(self._path, mode=b'w', atomictemp=True) as f:
                f.write(self._serialize(pending=pending))
            # if pending we still need to the write final data eventually
            self._dirty = pending
            return True

    def _serialize(self, pending=False):
        if pending:
            official_index_end = self._initial_index_end
            official_data_end = self._initial_data_end
        else:
            official_index_end = self._index_end
            official_data_end = self._data_end

        # this assert should be True as long as we have a single index filename
        assert official_data_end <= self._data_end
        data = (
            self._version_header,
            official_index_end,
            self._index_end,
            official_data_end,
            self._data_end,
        )
        return S_HEADER.pack(*data)


def default_docket(revlog, version_header):
    """given a revlog version a new docket object for the given revlog"""
    if (version_header & 0xFFFF) != constants.REVLOGV2:
        return None
    docket = RevlogDocket(revlog, version_header=version_header)
    docket._dirty = True
    return docket


def parse_docket(revlog, data, use_pending=False):
    """given some docket data return a docket object for the given revlog"""
    header = S_HEADER.unpack(data[: S_HEADER.size])
    version_header = header[0]
    index_size = header[1]
    pending_index_size = header[2]
    data_size = header[3]
    pending_data_size = header[4]
    docket = RevlogDocket(
        revlog,
        use_pending=use_pending,
        version_header=version_header,
        index_end=index_size,
        pending_index_end=pending_index_size,
        data_end=data_size,
        pending_data_end=pending_data_size,
    )
    return docket