view mercurial/pure/base85.py @ 24018:26d6a6a78c1d

obsolete: use parsers.fm1readmarker if it exists for a ~38% perf win This moves perfloadmarkers on my linux workstation (63494 markers from mpm, crew, and myself) performance from ! wall 0.357657 comb 0.360000 user 0.350000 sys 0.010000 (best of 28) to ! wall 0.222345 comb 0.220000 user 0.210000 sys 0.010000 (best of 41) which is a pretty good improvement. On my BSD machine, which is ancient and slow, before: ! wall 3.584964 comb 3.578125 user 3.539062 sys 0.039062 (best of 3) after: ! wall 2.267974 comb 2.265625 user 2.195312 sys 0.070312 (best of 5) I feel like we could do better by moving the whole generator function into C, but I didn't want to tackle that right away.
author Augie Fackler <augie@google.com>
date Tue, 20 Jan 2015 13:38:07 -0500
parents 20a9d823f242
children 9007f697e8ef
line wrap: on
line source

# base85.py: pure python base85 codec
#
# Copyright (C) 2009 Brendan Cully <brendan@kublai.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import struct

_b85chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
            "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
_b85dec = {}

def _mkb85dec():
    for i, c in enumerate(_b85chars):
        _b85dec[c] = i

def b85encode(text, pad=False):
    """encode text in base85 format"""
    l = len(text)
    r = l % 4
    if r:
        text += '\0' * (4 - r)
    longs = len(text) >> 2
    words = struct.unpack('>%dL' % (longs), text)

    out = ''.join(_b85chars[(word // 52200625) % 85] +
                  _b85chars2[(word // 7225) % 7225] +
                  _b85chars2[word % 7225]
                  for word in words)

    if pad:
        return out

    # Trim padding
    olen = l % 4
    if olen:
        olen += 1
    olen += l // 4 * 5
    return out[:olen]

def b85decode(text):
    """decode base85-encoded text"""
    if not _b85dec:
        _mkb85dec()

    l = len(text)
    out = []
    for i in range(0, len(text), 5):
        chunk = text[i:i + 5]
        acc = 0
        for j, c in enumerate(chunk):
            try:
                acc = acc * 85 + _b85dec[c]
            except KeyError:
                raise ValueError('bad base85 character at position %d'
                                 % (i + j))
        if acc > 4294967295:
            raise ValueError('Base85 overflow in hunk starting at byte %d' % i)
        out.append(acc)

    # Pad final chunk if necessary
    cl = l % 5
    if cl:
        acc *= 85 ** (5 - cl)
        if cl > 1:
            acc += 0xffffff >> (cl - 2) * 8
        out[-1] = acc

    out = struct.pack('>%dL' % (len(out)), *out)
    if cl:
        out = out[:-(5 - cl)]

    return out