view mercurial/pure/base85.py @ 45871:a985c4fb23ca

transaction: change list of journal entries into a dictionary The transaction object used to keep a mapping table of path names to journal entries and a list of journal entries consisting of path and file offset to truncate on rollback. The offsets are used in three cases. repair.strip and rollback process all of them in one go, but they care about the order. For them, it is perfectly reasonable to read the journal back from disk as both operations already involve at least one system call per journal entry. The other consumer is the revlog logic for moving from inline to external data storage. It doesn't care about the order of the journal and just needs to original offset stored. Further optimisations are possible here to move the in-memory journal to a set(), but without memoisation of the original revlog size this could turn it into O(n^2) behavior in worst case when many revlogs need to migrated. Differential Revision: https://phab.mercurial-scm.org/D9277
author Joerg Sonnenberger <joerg@bec.de>
date Sat, 07 Nov 2020 21:34:09 +0100
parents 4cd911040ba5
children 6000f5b25c9b
line wrap: on
line source

# base85.py: pure python base85 codec
#
# Copyright (C) 2009 Brendan Cully <brendan@kublai.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import struct

from .. import pycompat

_b85chars = pycompat.bytestr(
    b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"
    b"ghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"
)
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
_b85dec = {}


def _mkb85dec():
    for i, c in enumerate(_b85chars):
        _b85dec[c] = i


def b85encode(text, pad=False):
    """encode text in base85 format"""
    l = len(text)
    r = l % 4
    if r:
        text += b'\0' * (4 - r)
    longs = len(text) >> 2
    words = struct.unpack(b'>%dL' % longs, text)

    out = b''.join(
        _b85chars[(word // 52200625) % 85]
        + _b85chars2[(word // 7225) % 7225]
        + _b85chars2[word % 7225]
        for word in words
    )

    if pad:
        return out

    # Trim padding
    olen = l % 4
    if olen:
        olen += 1
    olen += l // 4 * 5
    return out[:olen]


def b85decode(text):
    """decode base85-encoded text"""
    if not _b85dec:
        _mkb85dec()

    l = len(text)
    out = []
    for i in range(0, len(text), 5):
        chunk = text[i : i + 5]
        chunk = pycompat.bytestr(chunk)
        acc = 0
        for j, c in enumerate(chunk):
            try:
                acc = acc * 85 + _b85dec[c]
            except KeyError:
                raise ValueError(
                    'bad base85 character at position %d' % (i + j)
                )
        if acc > 4294967295:
            raise ValueError('Base85 overflow in hunk starting at byte %d' % i)
        out.append(acc)

    # Pad final chunk if necessary
    cl = l % 5
    if cl:
        acc *= 85 ** (5 - cl)
        if cl > 1:
            acc += 0xFFFFFF >> (cl - 2) * 8
        out[-1] = acc

    out = struct.pack(b'>%dL' % (len(out)), *out)
    if cl:
        out = out[: -(5 - cl)]

    return out