view mercurial/base85.c @ 16777:058e14da7044

graphlog: turn getlogrevs() into a generator This improves the poor "time to first changeset" compared to the original log command. When running: $ hg log -u user log will enumerate the changelog and display matching revisions when they are found. But: $ hg log -G -u user will first find all revisions matching the user then start to display them. Initially, I considered turning revset.match() into a generator. This is doable but requires a fair amount of work. Instead, cmdutil.increasingwindows() is reused to call the revset matcher repeatedly. This has the nice properties of: - Let us reorder the windows after filtering, which is necessary as the matcher can reorder inputs but is an internal detail not a feature. - Let us feed the matcher with windows in changelog order, which is good for performances. - Have a generator designed for log-like commands, returning small windows at first then batching larger ones. I feel that calling the matcher multiple times is correct, at least with the revsets involved in getlogrevs() because they are: - stateless (no limit()) - respecting f(a|b) = f(a) | f(b), though I have no valid argument about that. Known issues compared to log code: - Calling the revset matcher multiple times can be slow when revset functions have to create expensive data structure for filtering. This will be addressed in a followup. - Predicate combinations like "--user foo --user bar" or "--user foo and --branch bar" are inherently slower because all input revision are checked against the first condition, then against the second, and so forth. log would enumerate the input revisions once and check each of them once against all conditions, which is faster. There are solutions but nothing cheap to implement. Some numbers against mozilla repository: first line total * hg log -u rnewman /Users/pmezard/bin/hg-2.2 0.148s 7.293s /Users/pmezard/bin/hgdev 0.132s 5.747s * hg log -u rnewman -u girard /Users/pmezard/bin/hg-2.2 0.146s 7.323s /Users/pmezard/bin/hgdev 0.136s 11.096s * hg log -l 10 /Users/pmezard/bin/hg-2.2 0.137s 0.153s /Users/pmezard/bin/hgdev 0.128s 0.144s * hg log -l 10 -u rnewman /Users/pmezard/bin/hg-2.2 0.146s 0.265s /Users/pmezard/bin/hgdev 0.133s 0.236s * hg log -b GECKO193a2_20100228_RELBRANCH /Users/pmezard/bin/hg-2.2 2.332s 6.618s /Users/pmezard/bin/hgdev 1.972s 5.543s * hg log xulrunner /Users/pmezard/bin/hg-2.2 5.829s 5.958s /Users/pmezard/bin/hgdev 0.194s 6.017s * hg log --follow xulrunner/build.mk /Users/pmezard/bin/hg-2.2 0.353s 0.438s /Users/pmezard/bin/hgdev 0.394s 0.580s * hg log -u girard tools /Users/pmezard/bin/hg-2.2 5.853s 6.012s /Users/pmezard/bin/hgdev 0.195s 6.030s * hg log -b COMM2000_20110314_RELBRANCH --copies /Users/pmezard/bin/hg-2.2 2.231s 6.653s /Users/pmezard/bin/hgdev 1.897s 5.585s * hg log --follow /Users/pmezard/bin/hg-2.2 0.137s 14.140s /Users/pmezard/bin/hgdev 0.381s 44.246s * hg log --follow -r 80000:90000 /Users/pmezard/bin/hg-2.2 0.127s 1.611s /Users/pmezard/bin/hgdev 0.147s 1.847s * hg log --follow -r 90000:80000 /Users/pmezard/bin/hg-2.2 0.130s 1.702s /Users/pmezard/bin/hgdev 0.368s 6.106s * hg log --follow -r 80000:90000 js/src/jsproxy.cpp /Users/pmezard/bin/hg-2.2 0.343s 0.388s /Users/pmezard/bin/hgdev 0.437s 0.631s * hg log --follow -r 90000:80000 js/src/jsproxy.cpp /Users/pmezard/bin/hg-2.2 0.342s 0.389s /Users/pmezard/bin/hgdev 0.442s 0.628s
author Patrick Mezard <patrick@mezard.eu>
date Tue, 08 May 2012 22:43:44 +0200
parents a8065323c003
children 1b9d54c00d50
line wrap: on
line source

/*
 base85 codec

 Copyright 2006 Brendan Cully <brendan@kublai.com>

 This software may be used and distributed according to the terms of
 the GNU General Public License, incorporated herein by reference.

 Largely based on git's implementation
*/

#include <Python.h>

#include "util.h"

static const char b85chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
	"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
static char b85dec[256];

static void
b85prep(void)
{
	int i;

	memset(b85dec, 0, sizeof(b85dec));
	for (i = 0; i < sizeof(b85chars); i++)
		b85dec[(int)(b85chars[i])] = i + 1;
}

static PyObject *
b85encode(PyObject *self, PyObject *args)
{
	const unsigned char *text;
	PyObject *out;
	char *dst;
	int len, olen, i;
	unsigned int acc, val, ch;
	int pad = 0;

	if (!PyArg_ParseTuple(args, "s#|i", &text, &len, &pad))
		return NULL;

	if (pad)
		olen = ((len + 3) / 4 * 5) - 3;
	else {
		olen = len % 4;
		if (olen)
			olen++;
		olen += len / 4 * 5;
	}
	if (!(out = PyBytes_FromStringAndSize(NULL, olen + 3)))
		return NULL;

	dst = PyBytes_AsString(out);

	while (len) {
		acc = 0;
		for (i = 24; i >= 0; i -= 8) {
			ch = *text++;
			acc |= ch << i;
			if (--len == 0)
				break;
		}
		for (i = 4; i >= 0; i--) {
			val = acc % 85;
			acc /= 85;
			dst[i] = b85chars[val];
		}
		dst += 5;
	}

	if (!pad)
		_PyBytes_Resize(&out, olen);

	return out;
}

static PyObject *
b85decode(PyObject *self, PyObject *args)
{
	PyObject *out;
	const char *text;
	char *dst;
	int len, i, j, olen, c, cap;
	unsigned int acc;

	if (!PyArg_ParseTuple(args, "s#", &text, &len))
		return NULL;

	olen = len / 5 * 4;
	i = len % 5;
	if (i)
		olen += i - 1;
	if (!(out = PyBytes_FromStringAndSize(NULL, olen)))
		return NULL;

	dst = PyBytes_AsString(out);

	i = 0;
	while (i < len)
	{
		acc = 0;
		cap = len - i - 1;
		if (cap > 4)
			cap = 4;
		for (j = 0; j < cap; i++, j++)
		{
			c = b85dec[(int)*text++] - 1;
			if (c < 0)
				return PyErr_Format(
					PyExc_ValueError,
					"bad base85 character at position %d", i);
			acc = acc * 85 + c;
		}
		if (i++ < len)
		{
			c = b85dec[(int)*text++] - 1;
			if (c < 0)
				return PyErr_Format(
					PyExc_ValueError,
					"bad base85 character at position %d", i);
			/* overflow detection: 0xffffffff == "|NsC0",
			 * "|NsC" == 0x03030303 */
			if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c)
				return PyErr_Format(
					PyExc_ValueError,
					"bad base85 sequence at position %d", i);
			acc += c;
		}

		cap = olen < 4 ? olen : 4;
		olen -= cap;
		for (j = 0; j < 4 - cap; j++)
			acc *= 85;
		if (cap && cap < 4)
			acc += 0xffffff >> (cap - 1) * 8;
		for (j = 0; j < cap; j++)
		{
			acc = (acc << 8) | (acc >> 24);
			*dst++ = acc;
		}
	}

	return out;
}

static char base85_doc[] = "Base85 Data Encoding";

static PyMethodDef methods[] = {
	{"b85encode", b85encode, METH_VARARGS,
	 "Encode text in base85.\n\n"
	 "If the second parameter is true, pad the result to a multiple of "
	 "five characters.\n"},
	{"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"},
	{NULL, NULL}
};

#ifdef IS_PY3K
static struct PyModuleDef base85_module = {
	PyModuleDef_HEAD_INIT,
	"base85",
	base85_doc,
	-1,
	methods
};

PyMODINIT_FUNC PyInit_base85(void)
{
	b85prep();

	return PyModule_Create(&base85_module);
}
#else
PyMODINIT_FUNC initbase85(void)
{
	Py_InitModule3("base85", methods, base85_doc);

	b85prep();
}
#endif