view mercurial/pure/base85.py @ 42437:9609430d3625

rust-filepatterns: use bytes instead of String In my initial patch, I introduced an unnecessary hard constraint on UTF-8 filenames and patterns which I forgot to remove. Although the performance penalty for using String might be negligible, we don't want to break compatibility with non-UTF-8 encodings for no reason. Moreover, this change allows for a cleaner Rust core API. This patch introduces a new utils module that is used with this fix. Finally, PatternError was not put inside the Python module generated by Rust, which would have raised a NameError. Differential Revision: https://phab.mercurial-scm.org/D6485
author Raphaël Gomès <rgomes@octobus.net>
date Thu, 06 Jun 2019 15:30:56 +0200
parents 80301c90a2dc
children 2372284d9457
line wrap: on
line source

# base85.py: pure python base85 codec
#
# Copyright (C) 2009 Brendan Cully <brendan@kublai.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import struct

from .. import pycompat

_b85chars = pycompat.bytestr("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"
                             "ghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
_b85dec = {}

def _mkb85dec():
    for i, c in enumerate(_b85chars):
        _b85dec[c] = i

def b85encode(text, pad=False):
    """encode text in base85 format"""
    l = len(text)
    r = l % 4
    if r:
        text += '\0' * (4 - r)
    longs = len(text) >> 2
    words = struct.unpack('>%dL' % (longs), text)

    out = ''.join(_b85chars[(word // 52200625) % 85] +
                  _b85chars2[(word // 7225) % 7225] +
                  _b85chars2[word % 7225]
                  for word in words)

    if pad:
        return out

    # Trim padding
    olen = l % 4
    if olen:
        olen += 1
    olen += l // 4 * 5
    return out[:olen]

def b85decode(text):
    """decode base85-encoded text"""
    if not _b85dec:
        _mkb85dec()

    l = len(text)
    out = []
    for i in range(0, len(text), 5):
        chunk = text[i:i + 5]
        chunk = pycompat.bytestr(chunk)
        acc = 0
        for j, c in enumerate(chunk):
            try:
                acc = acc * 85 + _b85dec[c]
            except KeyError:
                raise ValueError('bad base85 character at position %d'
                                 % (i + j))
        if acc > 4294967295:
            raise ValueError('Base85 overflow in hunk starting at byte %d' % i)
        out.append(acc)

    # Pad final chunk if necessary
    cl = l % 5
    if cl:
        acc *= 85 ** (5 - cl)
        if cl > 1:
            acc += 0xffffff >> (cl - 2) * 8
        out[-1] = acc

    out = struct.pack('>%dL' % (len(out)), *out)
    if cl:
        out = out[:-(5 - cl)]

    return out