view mercurial/cext/revlog.c @ 45432:f52b0297acc8

commit: fix a wrong argument name in documentation Differential Revision: https://phab.mercurial-scm.org/D8985
author Pulkit Goyal <7895pulkit@gmail.com>
date Thu, 03 Sep 2020 14:14:40 +0530
parents a6fde9d789d9
children 4404f129341e
line wrap: on
line source

/*
 parsers.c - efficient content parsing

 Copyright 2008 Matt Mackall <mpm@selenic.com> and others

 This software may be used and distributed according to the terms of
 the GNU General Public License, incorporated herein by reference.
*/

#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>

#include "bitmanipulation.h"
#include "charencode.h"
#include "revlog.h"
#include "util.h"

#ifdef IS_PY3K
/* The mapping of Python types is meant to be temporary to get Python
 * 3 to compile. We should remove this once Python 3 support is fully
 * supported and proper types are used in the extensions themselves. */
#define PyInt_Check PyLong_Check
#define PyInt_FromLong PyLong_FromLong
#define PyInt_FromSsize_t PyLong_FromSsize_t
#define PyInt_AsLong PyLong_AsLong
#endif

typedef struct indexObjectStruct indexObject;

typedef struct {
	int children[16];
} nodetreenode;

typedef struct {
	int abi_version;
	Py_ssize_t (*index_length)(const indexObject *);
	const char *(*index_node)(indexObject *, Py_ssize_t);
	int (*index_parents)(PyObject *, int, int *);
} Revlog_CAPI;

/*
 * A base-16 trie for fast node->rev mapping.
 *
 * Positive value is index of the next node in the trie
 * Negative value is a leaf: -(rev + 2)
 * Zero is empty
 */
typedef struct {
	indexObject *index;
	nodetreenode *nodes;
	unsigned length;   /* # nodes in use */
	unsigned capacity; /* # nodes allocated */
	int depth;         /* maximum depth of tree */
	int splits;        /* # splits performed */
} nodetree;

typedef struct {
	PyObject_HEAD /* ; */
	    nodetree nt;
} nodetreeObject;

/*
 * This class has two behaviors.
 *
 * When used in a list-like way (with integer keys), we decode an
 * entry in a RevlogNG index file on demand. We have limited support for
 * integer-keyed insert and delete, only at elements right before the
 * end.
 *
 * With string keys, we lazily perform a reverse mapping from node to
 * rev, using a base-16 trie.
 */
struct indexObjectStruct {
	PyObject_HEAD
	    /* Type-specific fields go here. */
	    PyObject *data;     /* raw bytes of index */
	Py_buffer buf;          /* buffer of data */
	PyObject **cache;       /* cached tuples */
	const char **offsets;   /* populated on demand */
	Py_ssize_t raw_length;  /* original number of elements */
	Py_ssize_t length;      /* current number of elements */
	PyObject *added;        /* populated on demand */
	PyObject *headrevs;     /* cache, invalidated on changes */
	PyObject *filteredrevs; /* filtered revs set */
	nodetree nt;            /* base-16 trie */
	int ntinitialized;      /* 0 or 1 */
	int ntrev;              /* last rev scanned */
	int ntlookups;          /* # lookups */
	int ntmisses;           /* # lookups that miss the cache */
	int inlined;
};

static Py_ssize_t index_length(const indexObject *self)
{
	if (self->added == NULL)
		return self->length;
	return self->length + PyList_GET_SIZE(self->added);
}

static PyObject *nullentry = NULL;
static const char nullid[20] = {0};
static const Py_ssize_t nullrev = -1;

static Py_ssize_t inline_scan(indexObject *self, const char **offsets);

static int index_find_node(indexObject *self, const char *node,
                           Py_ssize_t nodelen);

#if LONG_MAX == 0x7fffffffL
static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
#else
static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
#endif

/* A RevlogNG v1 index entry is 64 bytes long. */
static const long v1_hdrsize = 64;

static void raise_revlog_error(void)
{
	PyObject *mod = NULL, *dict = NULL, *errclass = NULL;

	mod = PyImport_ImportModule("mercurial.error");
	if (mod == NULL) {
		goto cleanup;
	}

	dict = PyModule_GetDict(mod);
	if (dict == NULL) {
		goto cleanup;
	}
	Py_INCREF(dict);

	errclass = PyDict_GetItemString(dict, "RevlogError");
	if (errclass == NULL) {
		PyErr_SetString(PyExc_SystemError,
		                "could not find RevlogError");
		goto cleanup;
	}

	/* value of exception is ignored by callers */
	PyErr_SetString(errclass, "RevlogError");

cleanup:
	Py_XDECREF(dict);
	Py_XDECREF(mod);
}

/*
 * Return a pointer to the beginning of a RevlogNG record.
 */
static const char *index_deref(indexObject *self, Py_ssize_t pos)
{
	if (self->inlined && pos > 0) {
		if (self->offsets == NULL) {
			Py_ssize_t ret;
			self->offsets = PyMem_Malloc(self->raw_length *
			                             sizeof(*self->offsets));
			if (self->offsets == NULL)
				return (const char *)PyErr_NoMemory();
			ret = inline_scan(self, self->offsets);
			if (ret == -1) {
				return NULL;
			};
		}
		return self->offsets[pos];
	}

	return (const char *)(self->buf.buf) + pos * v1_hdrsize;
}

/*
 * Get parents of the given rev.
 *
 * The specified rev must be valid and must not be nullrev. A returned
 * parent revision may be nullrev, but is guaranteed to be in valid range.
 */
static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
                                    int maxrev)
{
	if (rev >= self->length) {
		long tmp;
		PyObject *tuple =
		    PyList_GET_ITEM(self->added, rev - self->length);
		if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
			return -1;
		}
		ps[0] = (int)tmp;
		if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
			return -1;
		}
		ps[1] = (int)tmp;
	} else {
		const char *data = index_deref(self, rev);
		ps[0] = getbe32(data + 24);
		ps[1] = getbe32(data + 28);
	}
	/* If index file is corrupted, ps[] may point to invalid revisions. So
	 * there is a risk of buffer overflow to trust them unconditionally. */
	if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
		PyErr_SetString(PyExc_ValueError, "parent out of range");
		return -1;
	}
	return 0;
}

/*
 * Get parents of the given rev.
 *
 * If the specified rev is out of range, IndexError will be raised. If the
 * revlog entry is corrupted, ValueError may be raised.
 *
 * Returns 0 on success or -1 on failure.
 */
static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
{
	int tiprev;
	if (!op || !HgRevlogIndex_Check(op) || !ps) {
		PyErr_BadInternalCall();
		return -1;
	}
	tiprev = (int)index_length((indexObject *)op) - 1;
	if (rev < -1 || rev > tiprev) {
		PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
		return -1;
	} else if (rev == -1) {
		ps[0] = ps[1] = -1;
		return 0;
	} else {
		return index_get_parents((indexObject *)op, rev, ps, tiprev);
	}
}

static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
{
	uint64_t offset;
	if (rev == nullrev) {
		return 0;
	}
	if (rev >= self->length) {
		PyObject *tuple;
		PyObject *pylong;
		PY_LONG_LONG tmp;
		tuple = PyList_GET_ITEM(self->added, rev - self->length);
		pylong = PyTuple_GET_ITEM(tuple, 0);
		tmp = PyLong_AsLongLong(pylong);
		if (tmp == -1 && PyErr_Occurred()) {
			return -1;
		}
		if (tmp < 0) {
			PyErr_Format(PyExc_OverflowError,
			             "revlog entry size out of bound (%lld)",
			             (long long)tmp);
			return -1;
		}
		offset = (uint64_t)tmp;
	} else {
		const char *data = index_deref(self, rev);
		offset = getbe32(data + 4);
		if (rev == 0) {
			/* mask out version number for the first entry */
			offset &= 0xFFFF;
		} else {
			uint32_t offset_high = getbe32(data);
			offset |= ((uint64_t)offset_high) << 32;
		}
	}
	return (int64_t)(offset >> 16);
}

static inline int index_get_length(indexObject *self, Py_ssize_t rev)
{
	if (rev == nullrev) {
		return 0;
	}
	if (rev >= self->length) {
		PyObject *tuple;
		PyObject *pylong;
		long ret;
		tuple = PyList_GET_ITEM(self->added, rev - self->length);
		pylong = PyTuple_GET_ITEM(tuple, 1);
		ret = PyInt_AsLong(pylong);
		if (ret == -1 && PyErr_Occurred()) {
			return -1;
		}
		if (ret < 0 || ret > (long)INT_MAX) {
			PyErr_Format(PyExc_OverflowError,
			             "revlog entry size out of bound (%ld)",
			             ret);
			return -1;
		}
		return (int)ret;
	} else {
		const char *data = index_deref(self, rev);
		int tmp = (int)getbe32(data + 8);
		if (tmp < 0) {
			PyErr_Format(PyExc_OverflowError,
			             "revlog entry size out of bound (%d)",
			             tmp);
			return -1;
		}
		return tmp;
	}
}

/*
 * RevlogNG format (all in big endian, data may be inlined):
 *    6 bytes: offset
 *    2 bytes: flags
 *    4 bytes: compressed length
 *    4 bytes: uncompressed length
 *    4 bytes: base revision
 *    4 bytes: link revision
 *    4 bytes: parent 1 revision
 *    4 bytes: parent 2 revision
 *   32 bytes: nodeid (only 20 bytes used)
 */
static PyObject *index_get(indexObject *self, Py_ssize_t pos)
{
	uint64_t offset_flags;
	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
	const char *c_node_id;
	const char *data;
	Py_ssize_t length = index_length(self);
	PyObject *entry;

	if (pos == nullrev) {
		Py_INCREF(nullentry);
		return nullentry;
	}

	if (pos < 0 || pos >= length) {
		PyErr_SetString(PyExc_IndexError, "revlog index out of range");
		return NULL;
	}

	if (pos >= self->length) {
		PyObject *obj;
		obj = PyList_GET_ITEM(self->added, pos - self->length);
		Py_INCREF(obj);
		return obj;
	}

	if (self->cache) {
		if (self->cache[pos]) {
			Py_INCREF(self->cache[pos]);
			return self->cache[pos];
		}
	} else {
		self->cache = calloc(self->raw_length, sizeof(PyObject *));
		if (self->cache == NULL)
			return PyErr_NoMemory();
	}

	data = index_deref(self, pos);
	if (data == NULL)
		return NULL;

	offset_flags = getbe32(data + 4);
	if (pos == 0) /* mask out version number for the first entry */
		offset_flags &= 0xFFFF;
	else {
		uint32_t offset_high = getbe32(data);
		offset_flags |= ((uint64_t)offset_high) << 32;
	}

	comp_len = getbe32(data + 8);
	uncomp_len = getbe32(data + 12);
	base_rev = getbe32(data + 16);
	link_rev = getbe32(data + 20);
	parent_1 = getbe32(data + 24);
	parent_2 = getbe32(data + 28);
	c_node_id = data + 32;

	entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
	                      base_rev, link_rev, parent_1, parent_2, c_node_id,
	                      (Py_ssize_t)20);

	if (entry) {
		PyObject_GC_UnTrack(entry);
		Py_INCREF(entry);
	}

	self->cache[pos] = entry;

	return entry;
}

/*
 * Return the 20-byte SHA of the node corresponding to the given rev.
 */
static const char *index_node(indexObject *self, Py_ssize_t pos)
{
	Py_ssize_t length = index_length(self);
	const char *data;

	if (pos == nullrev)
		return nullid;

	if (pos >= length)
		return NULL;

	if (pos >= self->length) {
		PyObject *tuple, *str;
		tuple = PyList_GET_ITEM(self->added, pos - self->length);
		str = PyTuple_GetItem(tuple, 7);
		return str ? PyBytes_AS_STRING(str) : NULL;
	}

	data = index_deref(self, pos);
	return data ? data + 32 : NULL;
}

/*
 * Return the 20-byte SHA of the node corresponding to the given rev. The
 * rev is assumed to be existing. If not, an exception is set.
 */
static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
{
	const char *node = index_node(self, pos);
	if (node == NULL) {
		PyErr_Format(PyExc_IndexError, "could not access rev %d",
		             (int)pos);
	}
	return node;
}

static int nt_insert(nodetree *self, const char *node, int rev);

static int node_check(PyObject *obj, char **node)
{
	Py_ssize_t nodelen;
	if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
		return -1;
	if (nodelen == 20)
		return 0;
	PyErr_SetString(PyExc_ValueError, "20-byte hash required");
	return -1;
}

static PyObject *index_append(indexObject *self, PyObject *obj)
{
	char *node;
	Py_ssize_t len;

	if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
		PyErr_SetString(PyExc_TypeError, "8-tuple required");
		return NULL;
	}

	if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
		return NULL;

	len = index_length(self);

	if (self->added == NULL) {
		self->added = PyList_New(0);
		if (self->added == NULL)
			return NULL;
	}

	if (PyList_Append(self->added, obj) == -1)
		return NULL;

	if (self->ntinitialized)
		nt_insert(&self->nt, node, (int)len);

	Py_CLEAR(self->headrevs);
	Py_RETURN_NONE;
}

static PyObject *index_stats(indexObject *self)
{
	PyObject *obj = PyDict_New();
	PyObject *s = NULL;
	PyObject *t = NULL;

	if (obj == NULL)
		return NULL;

#define istat(__n, __d)                                                        \
	do {                                                                   \
		s = PyBytes_FromString(__d);                                   \
		t = PyInt_FromSsize_t(self->__n);                              \
		if (!s || !t)                                                  \
			goto bail;                                             \
		if (PyDict_SetItem(obj, s, t) == -1)                           \
			goto bail;                                             \
		Py_CLEAR(s);                                                   \
		Py_CLEAR(t);                                                   \
	} while (0)

	if (self->added) {
		Py_ssize_t len = PyList_GET_SIZE(self->added);
		s = PyBytes_FromString("index entries added");
		t = PyInt_FromSsize_t(len);
		if (!s || !t)
			goto bail;
		if (PyDict_SetItem(obj, s, t) == -1)
			goto bail;
		Py_CLEAR(s);
		Py_CLEAR(t);
	}

	if (self->raw_length != self->length)
		istat(raw_length, "revs on disk");
	istat(length, "revs in memory");
	istat(ntlookups, "node trie lookups");
	istat(ntmisses, "node trie misses");
	istat(ntrev, "node trie last rev scanned");
	if (self->ntinitialized) {
		istat(nt.capacity, "node trie capacity");
		istat(nt.depth, "node trie depth");
		istat(nt.length, "node trie count");
		istat(nt.splits, "node trie splits");
	}

#undef istat

	return obj;

bail:
	Py_XDECREF(obj);
	Py_XDECREF(s);
	Py_XDECREF(t);
	return NULL;
}

/*
 * When we cache a list, we want to be sure the caller can't mutate
 * the cached copy.
 */
static PyObject *list_copy(PyObject *list)
{
	Py_ssize_t len = PyList_GET_SIZE(list);
	PyObject *newlist = PyList_New(len);
	Py_ssize_t i;

	if (newlist == NULL)
		return NULL;

	for (i = 0; i < len; i++) {
		PyObject *obj = PyList_GET_ITEM(list, i);
		Py_INCREF(obj);
		PyList_SET_ITEM(newlist, i, obj);
	}

	return newlist;
}

static int check_filter(PyObject *filter, Py_ssize_t arg)
{
	if (filter) {
		PyObject *arglist, *result;
		int isfiltered;

		arglist = Py_BuildValue("(n)", arg);
		if (!arglist) {
			return -1;
		}

		result = PyEval_CallObject(filter, arglist);
		Py_DECREF(arglist);
		if (!result) {
			return -1;
		}

		/* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
		 * same as this function, so we can just return it directly.*/
		isfiltered = PyObject_IsTrue(result);
		Py_DECREF(result);
		return isfiltered;
	} else {
		return 0;
	}
}

static inline void set_phase_from_parents(char *phases, int parent_1,
                                          int parent_2, Py_ssize_t i)
{
	if (parent_1 >= 0 && phases[parent_1] > phases[i])
		phases[i] = phases[parent_1];
	if (parent_2 >= 0 && phases[parent_2] > phases[i])
		phases[i] = phases[parent_2];
}

static PyObject *reachableroots2(indexObject *self, PyObject *args)
{

	/* Input */
	long minroot;
	PyObject *includepatharg = NULL;
	int includepath = 0;
	/* heads and roots are lists */
	PyObject *heads = NULL;
	PyObject *roots = NULL;
	PyObject *reachable = NULL;

	PyObject *val;
	Py_ssize_t len = index_length(self);
	long revnum;
	Py_ssize_t k;
	Py_ssize_t i;
	Py_ssize_t l;
	int r;
	int parents[2];

	/* Internal data structure:
	 * tovisit: array of length len+1 (all revs + nullrev), filled upto
	 * lentovisit
	 *
	 * revstates: array of length len+1 (all revs + nullrev) */
	int *tovisit = NULL;
	long lentovisit = 0;
	enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
	char *revstates = NULL;

	/* Get arguments */
	if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
	                      &PyList_Type, &roots, &PyBool_Type,
	                      &includepatharg))
		goto bail;

	if (includepatharg == Py_True)
		includepath = 1;

	/* Initialize return set */
	reachable = PyList_New(0);
	if (reachable == NULL)
		goto bail;

	/* Initialize internal datastructures */
	tovisit = (int *)malloc((len + 1) * sizeof(int));
	if (tovisit == NULL) {
		PyErr_NoMemory();
		goto bail;
	}

	revstates = (char *)calloc(len + 1, 1);
	if (revstates == NULL) {
		PyErr_NoMemory();
		goto bail;
	}

	l = PyList_GET_SIZE(roots);
	for (i = 0; i < l; i++) {
		revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
		if (revnum == -1 && PyErr_Occurred())
			goto bail;
		/* If root is out of range, e.g. wdir(), it must be unreachable
		 * from heads. So we can just ignore it. */
		if (revnum + 1 < 0 || revnum + 1 >= len + 1)
			continue;
		revstates[revnum + 1] |= RS_ROOT;
	}

	/* Populate tovisit with all the heads */
	l = PyList_GET_SIZE(heads);
	for (i = 0; i < l; i++) {
		revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
		if (revnum == -1 && PyErr_Occurred())
			goto bail;
		if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
			PyErr_SetString(PyExc_IndexError, "head out of range");
			goto bail;
		}
		if (!(revstates[revnum + 1] & RS_SEEN)) {
			tovisit[lentovisit++] = (int)revnum;
			revstates[revnum + 1] |= RS_SEEN;
		}
	}

	/* Visit the tovisit list and find the reachable roots */
	k = 0;
	while (k < lentovisit) {
		/* Add the node to reachable if it is a root*/
		revnum = tovisit[k++];
		if (revstates[revnum + 1] & RS_ROOT) {
			revstates[revnum + 1] |= RS_REACHABLE;
			val = PyInt_FromLong(revnum);
			if (val == NULL)
				goto bail;
			r = PyList_Append(reachable, val);
			Py_DECREF(val);
			if (r < 0)
				goto bail;
			if (includepath == 0)
				continue;
		}

		/* Add its parents to the list of nodes to visit */
		if (revnum == nullrev)
			continue;
		r = index_get_parents(self, revnum, parents, (int)len - 1);
		if (r < 0)
			goto bail;
		for (i = 0; i < 2; i++) {
			if (!(revstates[parents[i] + 1] & RS_SEEN) &&
			    parents[i] >= minroot) {
				tovisit[lentovisit++] = parents[i];
				revstates[parents[i] + 1] |= RS_SEEN;
			}
		}
	}

	/* Find all the nodes in between the roots we found and the heads
	 * and add them to the reachable set */
	if (includepath == 1) {
		long minidx = minroot;
		if (minidx < 0)
			minidx = 0;
		for (i = minidx; i < len; i++) {
			if (!(revstates[i + 1] & RS_SEEN))
				continue;
			r = index_get_parents(self, i, parents, (int)len - 1);
			/* Corrupted index file, error is set from
			 * index_get_parents */
			if (r < 0)
				goto bail;
			if (((revstates[parents[0] + 1] |
			      revstates[parents[1] + 1]) &
			     RS_REACHABLE) &&
			    !(revstates[i + 1] & RS_REACHABLE)) {
				revstates[i + 1] |= RS_REACHABLE;
				val = PyInt_FromSsize_t(i);
				if (val == NULL)
					goto bail;
				r = PyList_Append(reachable, val);
				Py_DECREF(val);
				if (r < 0)
					goto bail;
			}
		}
	}

	free(revstates);
	free(tovisit);
	return reachable;
bail:
	Py_XDECREF(reachable);
	free(revstates);
	free(tovisit);
	return NULL;
}

static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
                             char phase)
{
	Py_ssize_t len = index_length(self);
	PyObject *item;
	PyObject *iterator;
	int rev, minrev = -1;
	char *node;

	if (!PySet_Check(roots)) {
		PyErr_SetString(PyExc_TypeError,
		                "roots must be a set of nodes");
		return -2;
	}
	iterator = PyObject_GetIter(roots);
	if (iterator == NULL)
		return -2;
	while ((item = PyIter_Next(iterator))) {
		if (node_check(item, &node) == -1)
			goto failed;
		rev = index_find_node(self, node, 20);
		/* null is implicitly public, so negative is invalid */
		if (rev < 0 || rev >= len)
			goto failed;
		phases[rev] = phase;
		if (minrev == -1 || minrev > rev)
			minrev = rev;
		Py_DECREF(item);
	}
	Py_DECREF(iterator);
	return minrev;
failed:
	Py_DECREF(iterator);
	Py_DECREF(item);
	return -2;
}

static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
{
	/* 0: public (untracked), 1: draft, 2: secret, 32: archive,
	   96: internal */
	static const char trackedphases[] = {1, 2, 32, 96};
	PyObject *roots = Py_None;
	PyObject *phasesetsdict = NULL;
	PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
	Py_ssize_t len = index_length(self);
	char *phases = NULL;
	int minphaserev = -1, rev, i;
	const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));

	if (!PyArg_ParseTuple(args, "O", &roots))
		return NULL;
	if (roots == NULL || !PyDict_Check(roots)) {
		PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
		return NULL;
	}

	phases = calloc(len, 1);
	if (phases == NULL) {
		PyErr_NoMemory();
		return NULL;
	}

	for (i = 0; i < numphases; ++i) {
		PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
		PyObject *phaseroots = NULL;
		if (pyphase == NULL)
			goto release;
		phaseroots = PyDict_GetItem(roots, pyphase);
		Py_DECREF(pyphase);
		if (phaseroots == NULL)
			continue;
		rev = add_roots_get_min(self, phaseroots, phases,
		                        trackedphases[i]);
		if (rev == -2)
			goto release;
		if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
			minphaserev = rev;
	}

	for (i = 0; i < numphases; ++i) {
		phasesets[i] = PySet_New(NULL);
		if (phasesets[i] == NULL)
			goto release;
	}

	if (minphaserev == -1)
		minphaserev = len;
	for (rev = minphaserev; rev < len; ++rev) {
		PyObject *pyphase = NULL;
		PyObject *pyrev = NULL;
		int parents[2];
		/*
		 * The parent lookup could be skipped for phaseroots, but
		 * phase --force would historically not recompute them
		 * correctly, leaving descendents with a lower phase around.
		 * As such, unconditionally recompute the phase.
		 */
		if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
			goto release;
		set_phase_from_parents(phases, parents[0], parents[1], rev);
		switch (phases[rev]) {
		case 0:
			continue;
		case 1:
			pyphase = phasesets[0];
			break;
		case 2:
			pyphase = phasesets[1];
			break;
		case 32:
			pyphase = phasesets[2];
			break;
		case 96:
			pyphase = phasesets[3];
			break;
		default:
			/* this should never happen since the phase number is
			 * specified by this function. */
			PyErr_SetString(PyExc_SystemError,
			                "bad phase number in internal list");
			goto release;
		}
		pyrev = PyInt_FromLong(rev);
		if (pyrev == NULL)
			goto release;
		if (PySet_Add(pyphase, pyrev) == -1) {
			Py_DECREF(pyrev);
			goto release;
		}
		Py_DECREF(pyrev);
	}

	phasesetsdict = _dict_new_presized(numphases);
	if (phasesetsdict == NULL)
		goto release;
	for (i = 0; i < numphases; ++i) {
		PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
		if (pyphase == NULL)
			goto release;
		if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
		    -1) {
			Py_DECREF(pyphase);
			goto release;
		}
		Py_DECREF(phasesets[i]);
		phasesets[i] = NULL;
	}

	return Py_BuildValue("nN", len, phasesetsdict);

release:
	for (i = 0; i < numphases; ++i)
		Py_XDECREF(phasesets[i]);
	Py_XDECREF(phasesetsdict);

	free(phases);
	return NULL;
}

static PyObject *index_headrevs(indexObject *self, PyObject *args)
{
	Py_ssize_t i, j, len;
	char *nothead = NULL;
	PyObject *heads = NULL;
	PyObject *filter = NULL;
	PyObject *filteredrevs = Py_None;

	if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
		return NULL;
	}

	if (self->headrevs && filteredrevs == self->filteredrevs)
		return list_copy(self->headrevs);

	Py_DECREF(self->filteredrevs);
	self->filteredrevs = filteredrevs;
	Py_INCREF(filteredrevs);

	if (filteredrevs != Py_None) {
		filter = PyObject_GetAttrString(filteredrevs, "__contains__");
		if (!filter) {
			PyErr_SetString(
			    PyExc_TypeError,
			    "filteredrevs has no attribute __contains__");
			goto bail;
		}
	}

	len = index_length(self);
	heads = PyList_New(0);
	if (heads == NULL)
		goto bail;
	if (len == 0) {
		PyObject *nullid = PyInt_FromLong(-1);
		if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
			Py_XDECREF(nullid);
			goto bail;
		}
		goto done;
	}

	nothead = calloc(len, 1);
	if (nothead == NULL) {
		PyErr_NoMemory();
		goto bail;
	}

	for (i = len - 1; i >= 0; i--) {
		int isfiltered;
		int parents[2];

		/* If nothead[i] == 1, it means we've seen an unfiltered child
		 * of this node already, and therefore this node is not
		 * filtered. So we can skip the expensive check_filter step.
		 */
		if (nothead[i] != 1) {
			isfiltered = check_filter(filter, i);
			if (isfiltered == -1) {
				PyErr_SetString(PyExc_TypeError,
				                "unable to check filter");
				goto bail;
			}

			if (isfiltered) {
				nothead[i] = 1;
				continue;
			}
		}

		if (index_get_parents(self, i, parents, (int)len - 1) < 0)
			goto bail;
		for (j = 0; j < 2; j++) {
			if (parents[j] >= 0)
				nothead[parents[j]] = 1;
		}
	}

	for (i = 0; i < len; i++) {
		PyObject *head;

		if (nothead[i])
			continue;
		head = PyInt_FromSsize_t(i);
		if (head == NULL || PyList_Append(heads, head) == -1) {
			Py_XDECREF(head);
			goto bail;
		}
	}

done:
	self->headrevs = heads;
	Py_XDECREF(filter);
	free(nothead);
	return list_copy(self->headrevs);
bail:
	Py_XDECREF(filter);
	Py_XDECREF(heads);
	free(nothead);
	return NULL;
}

/**
 * Obtain the base revision index entry.
 *
 * Callers must ensure that rev >= 0 or illegal memory access may occur.
 */
static inline int index_baserev(indexObject *self, int rev)
{
	const char *data;
	int result;

	if (rev >= self->length) {
		PyObject *tuple =
		    PyList_GET_ITEM(self->added, rev - self->length);
		long ret;
		if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
			return -2;
		}
		result = (int)ret;
	} else {
		data = index_deref(self, rev);
		if (data == NULL) {
			return -2;
		}

		result = getbe32(data + 16);
	}
	if (result > rev) {
		PyErr_Format(
		    PyExc_ValueError,
		    "corrupted revlog, revision base above revision: %d, %d",
		    rev, result);
		return -2;
	}
	if (result < -1) {
		PyErr_Format(
		    PyExc_ValueError,
		    "corrupted revlog, revision base out of range: %d, %d", rev,
		    result);
		return -2;
	}
	return result;
}

/**
 * Find if a revision is a snapshot or not
 *
 * Only relevant for sparse-revlog case.
 * Callers must ensure that rev is in a valid range.
 */
static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
{
	int ps[2];
	Py_ssize_t base;
	while (rev >= 0) {
		base = (Py_ssize_t)index_baserev(self, rev);
		if (base == rev) {
			base = -1;
		}
		if (base == -2) {
			assert(PyErr_Occurred());
			return -1;
		}
		if (base == -1) {
			return 1;
		}
		if (index_get_parents(self, rev, ps, (int)rev) < 0) {
			assert(PyErr_Occurred());
			return -1;
		};
		if (base == ps[0] || base == ps[1]) {
			return 0;
		}
		rev = base;
	}
	return rev == -1;
}

static PyObject *index_issnapshot(indexObject *self, PyObject *value)
{
	long rev;
	int issnap;
	Py_ssize_t length = index_length(self);

	if (!pylong_to_long(value, &rev)) {
		return NULL;
	}
	if (rev < -1 || rev >= length) {
		PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
		             rev);
		return NULL;
	};
	issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
	if (issnap < 0) {
		return NULL;
	};
	return PyBool_FromLong((long)issnap);
}

static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
{
	Py_ssize_t start_rev;
	PyObject *cache;
	Py_ssize_t base;
	Py_ssize_t rev;
	PyObject *key = NULL;
	PyObject *value = NULL;
	const Py_ssize_t length = index_length(self);
	if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
		return NULL;
	}
	for (rev = start_rev; rev < length; rev++) {
		int issnap;
		PyObject *allvalues = NULL;
		issnap = index_issnapshotrev(self, rev);
		if (issnap < 0) {
			goto bail;
		}
		if (issnap == 0) {
			continue;
		}
		base = (Py_ssize_t)index_baserev(self, rev);
		if (base == rev) {
			base = -1;
		}
		if (base == -2) {
			assert(PyErr_Occurred());
			goto bail;
		}
		key = PyInt_FromSsize_t(base);
		allvalues = PyDict_GetItem(cache, key);
		if (allvalues == NULL && PyErr_Occurred()) {
			goto bail;
		}
		if (allvalues == NULL) {
			int r;
			allvalues = PyList_New(0);
			if (!allvalues) {
				goto bail;
			}
			r = PyDict_SetItem(cache, key, allvalues);
			Py_DECREF(allvalues);
			if (r < 0) {
				goto bail;
			}
		}
		value = PyInt_FromSsize_t(rev);
		if (PyList_Append(allvalues, value)) {
			goto bail;
		}
		Py_CLEAR(key);
		Py_CLEAR(value);
	}
	Py_RETURN_NONE;
bail:
	Py_XDECREF(key);
	Py_XDECREF(value);
	return NULL;
}

static PyObject *index_deltachain(indexObject *self, PyObject *args)
{
	int rev, generaldelta;
	PyObject *stoparg;
	int stoprev, iterrev, baserev = -1;
	int stopped;
	PyObject *chain = NULL, *result = NULL;
	const Py_ssize_t length = index_length(self);

	if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
		return NULL;
	}

	if (PyInt_Check(stoparg)) {
		stoprev = (int)PyInt_AsLong(stoparg);
		if (stoprev == -1 && PyErr_Occurred()) {
			return NULL;
		}
	} else if (stoparg == Py_None) {
		stoprev = -2;
	} else {
		PyErr_SetString(PyExc_ValueError,
		                "stoprev must be integer or None");
		return NULL;
	}

	if (rev < 0 || rev >= length) {
		PyErr_SetString(PyExc_ValueError, "revlog index out of range");
		return NULL;
	}

	chain = PyList_New(0);
	if (chain == NULL) {
		return NULL;
	}

	baserev = index_baserev(self, rev);

	/* This should never happen. */
	if (baserev <= -2) {
		/* Error should be set by index_deref() */
		assert(PyErr_Occurred());
		goto bail;
	}

	iterrev = rev;

	while (iterrev != baserev && iterrev != stoprev) {
		PyObject *value = PyInt_FromLong(iterrev);
		if (value == NULL) {
			goto bail;
		}
		if (PyList_Append(chain, value)) {
			Py_DECREF(value);
			goto bail;
		}
		Py_DECREF(value);

		if (generaldelta) {
			iterrev = baserev;
		} else {
			iterrev--;
		}

		if (iterrev < 0) {
			break;
		}

		if (iterrev >= length) {
			PyErr_SetString(PyExc_IndexError,
			                "revision outside index");
			return NULL;
		}

		baserev = index_baserev(self, iterrev);

		/* This should never happen. */
		if (baserev <= -2) {
			/* Error should be set by index_deref() */
			assert(PyErr_Occurred());
			goto bail;
		}
	}

	if (iterrev == stoprev) {
		stopped = 1;
	} else {
		PyObject *value = PyInt_FromLong(iterrev);
		if (value == NULL) {
			goto bail;
		}
		if (PyList_Append(chain, value)) {
			Py_DECREF(value);
			goto bail;
		}
		Py_DECREF(value);

		stopped = 0;
	}

	if (PyList_Reverse(chain)) {
		goto bail;
	}

	result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
	Py_DECREF(chain);
	return result;

bail:
	Py_DECREF(chain);
	return NULL;
}

static inline int64_t
index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
{
	int64_t start_offset;
	int64_t end_offset;
	int end_size;
	start_offset = index_get_start(self, start_rev);
	if (start_offset < 0) {
		return -1;
	}
	end_offset = index_get_start(self, end_rev);
	if (end_offset < 0) {
		return -1;
	}
	end_size = index_get_length(self, end_rev);
	if (end_size < 0) {
		return -1;
	}
	if (end_offset < start_offset) {
		PyErr_Format(PyExc_ValueError,
		             "corrupted revlog index: inconsistent offset "
		             "between revisions (%zd) and (%zd)",
		             start_rev, end_rev);
		return -1;
	}
	return (end_offset - start_offset) + (int64_t)end_size;
}

/* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
                              Py_ssize_t startidx, Py_ssize_t endidx)
{
	int length;
	while (endidx > 1 && endidx > startidx) {
		length = index_get_length(self, revs[endidx - 1]);
		if (length < 0) {
			return -1;
		}
		if (length != 0) {
			break;
		}
		endidx -= 1;
	}
	return endidx;
}

struct Gap {
	int64_t size;
	Py_ssize_t idx;
};

static int gap_compare(const void *left, const void *right)
{
	const struct Gap *l_left = ((const struct Gap *)left);
	const struct Gap *l_right = ((const struct Gap *)right);
	if (l_left->size < l_right->size) {
		return -1;
	} else if (l_left->size > l_right->size) {
		return 1;
	}
	return 0;
}
static int Py_ssize_t_compare(const void *left, const void *right)
{
	const Py_ssize_t l_left = *(const Py_ssize_t *)left;
	const Py_ssize_t l_right = *(const Py_ssize_t *)right;
	if (l_left < l_right) {
		return -1;
	} else if (l_left > l_right) {
		return 1;
	}
	return 0;
}

static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
{
	/* method arguments */
	PyObject *list_revs = NULL; /* revisions in the chain */
	double targetdensity = 0;   /* min density to achieve */
	Py_ssize_t mingapsize = 0;  /* threshold to ignore gaps */

	/* other core variables */
	Py_ssize_t idxlen = index_length(self);
	Py_ssize_t i;            /* used for various iteration */
	PyObject *result = NULL; /* the final return of the function */

	/* generic information about the delta chain being slice */
	Py_ssize_t num_revs = 0;    /* size of the full delta chain */
	Py_ssize_t *revs = NULL;    /* native array of revision in the chain */
	int64_t chainpayload = 0;   /* sum of all delta in the chain */
	int64_t deltachainspan = 0; /* distance from first byte to last byte */

	/* variable used for slicing the delta chain */
	int64_t readdata = 0; /* amount of data currently planned to be read */
	double density = 0;   /* ration of payload data compared to read ones */
	int64_t previous_end;
	struct Gap *gaps = NULL; /* array of notable gap in the chain */
	Py_ssize_t num_gaps =
	    0; /* total number of notable gap recorded so far */
	Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
	Py_ssize_t num_selected = 0;         /* number of gaps skipped */
	PyObject *chunk = NULL;              /* individual slice */
	PyObject *allchunks = NULL;          /* all slices */
	Py_ssize_t previdx;

	/* parsing argument */
	if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
	                      &targetdensity, &mingapsize)) {
		goto bail;
	}

	/* If the delta chain contains a single element, we do not need slicing
	 */
	num_revs = PyList_GET_SIZE(list_revs);
	if (num_revs <= 1) {
		result = PyTuple_Pack(1, list_revs);
		goto done;
	}

	/* Turn the python list into a native integer array (for efficiency) */
	revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
	if (revs == NULL) {
		PyErr_NoMemory();
		goto bail;
	}
	for (i = 0; i < num_revs; i++) {
		Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
		if (revnum == -1 && PyErr_Occurred()) {
			goto bail;
		}
		if (revnum < nullrev || revnum >= idxlen) {
			PyErr_Format(PyExc_IndexError,
			             "index out of range: %zd", revnum);
			goto bail;
		}
		revs[i] = revnum;
	}

	/* Compute and check various property of the unsliced delta chain */
	deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
	if (deltachainspan < 0) {
		goto bail;
	}

	if (deltachainspan <= mingapsize) {
		result = PyTuple_Pack(1, list_revs);
		goto done;
	}
	chainpayload = 0;
	for (i = 0; i < num_revs; i++) {
		int tmp = index_get_length(self, revs[i]);
		if (tmp < 0) {
			goto bail;
		}
		chainpayload += tmp;
	}

	readdata = deltachainspan;
	density = 1.0;

	if (0 < deltachainspan) {
		density = (double)chainpayload / (double)deltachainspan;
	}

	if (density >= targetdensity) {
		result = PyTuple_Pack(1, list_revs);
		goto done;
	}

	/* if chain is too sparse, look for relevant gaps */
	gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
	if (gaps == NULL) {
		PyErr_NoMemory();
		goto bail;
	}

	previous_end = -1;
	for (i = 0; i < num_revs; i++) {
		int64_t revstart;
		int revsize;
		revstart = index_get_start(self, revs[i]);
		if (revstart < 0) {
			goto bail;
		};
		revsize = index_get_length(self, revs[i]);
		if (revsize < 0) {
			goto bail;
		};
		if (revsize == 0) {
			continue;
		}
		if (previous_end >= 0) {
			int64_t gapsize = revstart - previous_end;
			if (gapsize > mingapsize) {
				gaps[num_gaps].size = gapsize;
				gaps[num_gaps].idx = i;
				num_gaps += 1;
			}
		}
		previous_end = revstart + revsize;
	}
	if (num_gaps == 0) {
		result = PyTuple_Pack(1, list_revs);
		goto done;
	}
	qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);

	/* Slice the largest gap first, they improve the density the most */
	selected_indices =
	    (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
	if (selected_indices == NULL) {
		PyErr_NoMemory();
		goto bail;
	}

	for (i = num_gaps - 1; i >= 0; i--) {
		selected_indices[num_selected] = gaps[i].idx;
		readdata -= gaps[i].size;
		num_selected += 1;
		if (readdata <= 0) {
			density = 1.0;
		} else {
			density = (double)chainpayload / (double)readdata;
		}
		if (density >= targetdensity) {
			break;
		}
	}
	qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
	      &Py_ssize_t_compare);

	/* create the resulting slice */
	allchunks = PyList_New(0);
	if (allchunks == NULL) {
		goto bail;
	}
	previdx = 0;
	selected_indices[num_selected] = num_revs;
	for (i = 0; i <= num_selected; i++) {
		Py_ssize_t idx = selected_indices[i];
		Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
		if (endidx < 0) {
			goto bail;
		}
		if (previdx < endidx) {
			chunk = PyList_GetSlice(list_revs, previdx, endidx);
			if (chunk == NULL) {
				goto bail;
			}
			if (PyList_Append(allchunks, chunk) == -1) {
				goto bail;
			}
			Py_DECREF(chunk);
			chunk = NULL;
		}
		previdx = idx;
	}
	result = allchunks;
	goto done;

bail:
	Py_XDECREF(allchunks);
	Py_XDECREF(chunk);
done:
	free(revs);
	free(gaps);
	free(selected_indices);
	return result;
}

static inline int nt_level(const char *node, Py_ssize_t level)
{
	int v = node[level >> 1];
	if (!(level & 1))
		v >>= 4;
	return v & 0xf;
}

/*
 * Return values:
 *
 *   -4: match is ambiguous (multiple candidates)
 *   -2: not found
 * rest: valid rev
 */
static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
                   int hex)
{
	int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
	int level, maxlevel, off;

	if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
		return -1;

	if (hex)
		maxlevel = nodelen > 40 ? 40 : (int)nodelen;
	else
		maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);

	for (level = off = 0; level < maxlevel; level++) {
		int k = getnybble(node, level);
		nodetreenode *n = &self->nodes[off];
		int v = n->children[k];

		if (v < 0) {
			const char *n;
			Py_ssize_t i;

			v = -(v + 2);
			n = index_node(self->index, v);
			if (n == NULL)
				return -2;
			for (i = level; i < maxlevel; i++)
				if (getnybble(node, i) != nt_level(n, i))
					return -2;
			return v;
		}
		if (v == 0)
			return -2;
		off = v;
	}
	/* multiple matches against an ambiguous prefix */
	return -4;
}

static int nt_new(nodetree *self)
{
	if (self->length == self->capacity) {
		unsigned newcapacity;
		nodetreenode *newnodes;
		newcapacity = self->capacity * 2;
		if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
			PyErr_SetString(PyExc_MemoryError,
			                "overflow in nt_new");
			return -1;
		}
		newnodes =
		    realloc(self->nodes, newcapacity * sizeof(nodetreenode));
		if (newnodes == NULL) {
			PyErr_SetString(PyExc_MemoryError, "out of memory");
			return -1;
		}
		self->capacity = newcapacity;
		self->nodes = newnodes;
		memset(&self->nodes[self->length], 0,
		       sizeof(nodetreenode) * (self->capacity - self->length));
	}
	return self->length++;
}

static int nt_insert(nodetree *self, const char *node, int rev)
{
	int level = 0;
	int off = 0;

	while (level < 40) {
		int k = nt_level(node, level);
		nodetreenode *n;
		int v;

		n = &self->nodes[off];
		v = n->children[k];

		if (v == 0) {
			n->children[k] = -rev - 2;
			return 0;
		}
		if (v < 0) {
			const char *oldnode =
			    index_node_existing(self->index, -(v + 2));
			int noff;

			if (oldnode == NULL)
				return -1;
			if (!memcmp(oldnode, node, 20)) {
				n->children[k] = -rev - 2;
				return 0;
			}
			noff = nt_new(self);
			if (noff == -1)
				return -1;
			/* self->nodes may have been changed by realloc */
			self->nodes[off].children[k] = noff;
			off = noff;
			n = &self->nodes[off];
			n->children[nt_level(oldnode, ++level)] = v;
			if (level > self->depth)
				self->depth = level;
			self->splits += 1;
		} else {
			level += 1;
			off = v;
		}
	}

	return -1;
}

static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
{
	Py_ssize_t rev;
	const char *node;
	Py_ssize_t length;
	if (!PyArg_ParseTuple(args, "n", &rev))
		return NULL;
	length = index_length(self->nt.index);
	if (rev < 0 || rev >= length) {
		PyErr_SetString(PyExc_ValueError, "revlog index out of range");
		return NULL;
	}
	node = index_node_existing(self->nt.index, rev);
	if (nt_insert(&self->nt, node, (int)rev) == -1)
		return NULL;
	Py_RETURN_NONE;
}

static int nt_delete_node(nodetree *self, const char *node)
{
	/* rev==-2 happens to get encoded as 0, which is interpreted as not set
	 */
	return nt_insert(self, node, -2);
}

static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
{
	/* Initialize before overflow-checking to avoid nt_dealloc() crash. */
	self->nodes = NULL;

	self->index = index;
	/* The input capacity is in terms of revisions, while the field is in
	 * terms of nodetree nodes. */
	self->capacity = (capacity < 4 ? 4 : capacity / 2);
	self->depth = 0;
	self->splits = 0;
	if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
		PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
		return -1;
	}
	self->nodes = calloc(self->capacity, sizeof(nodetreenode));
	if (self->nodes == NULL) {
		PyErr_NoMemory();
		return -1;
	}
	self->length = 1;
	return 0;
}

static int ntobj_init(nodetreeObject *self, PyObject *args)
{
	PyObject *index;
	unsigned capacity;
	if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
	                      &capacity))
		return -1;
	Py_INCREF(index);
	return nt_init(&self->nt, (indexObject *)index, capacity);
}

static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
{
	return nt_find(self, node, nodelen, 1);
}

/*
 * Find the length of the shortest unique prefix of node.
 *
 * Return values:
 *
 *   -3: error (exception set)
 *   -2: not found (no exception set)
 * rest: length of shortest prefix
 */
static int nt_shortest(nodetree *self, const char *node)
{
	int level, off;

	for (level = off = 0; level < 40; level++) {
		int k, v;
		nodetreenode *n = &self->nodes[off];
		k = nt_level(node, level);
		v = n->children[k];
		if (v < 0) {
			const char *n;
			v = -(v + 2);
			n = index_node_existing(self->index, v);
			if (n == NULL)
				return -3;
			if (memcmp(node, n, 20) != 0)
				/*
				 * Found a unique prefix, but it wasn't for the
				 * requested node (i.e the requested node does
				 * not exist).
				 */
				return -2;
			return level + 1;
		}
		if (v == 0)
			return -2;
		off = v;
	}
	/*
	 * The node was still not unique after 40 hex digits, so this won't
	 * happen. Also, if we get here, then there's a programming error in
	 * this file that made us insert a node longer than 40 hex digits.
	 */
	PyErr_SetString(PyExc_Exception, "broken node tree");
	return -3;
}

static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
{
	PyObject *val;
	char *node;
	int length;

	if (!PyArg_ParseTuple(args, "O", &val))
		return NULL;
	if (node_check(val, &node) == -1)
		return NULL;

	length = nt_shortest(&self->nt, node);
	if (length == -3)
		return NULL;
	if (length == -2) {
		raise_revlog_error();
		return NULL;
	}
	return PyInt_FromLong(length);
}

static void nt_dealloc(nodetree *self)
{
	free(self->nodes);
	self->nodes = NULL;
}

static void ntobj_dealloc(nodetreeObject *self)
{
	Py_XDECREF(self->nt.index);
	nt_dealloc(&self->nt);
	PyObject_Del(self);
}

static PyMethodDef ntobj_methods[] = {
    {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
     "insert an index entry"},
    {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
     "find length of shortest hex nodeid of a binary ID"},
    {NULL} /* Sentinel */
};

static PyTypeObject nodetreeType = {
    PyVarObject_HEAD_INIT(NULL, 0) /* header */
    "parsers.nodetree",            /* tp_name */
    sizeof(nodetreeObject),        /* tp_basicsize */
    0,                             /* tp_itemsize */
    (destructor)ntobj_dealloc,     /* tp_dealloc */
    0,                             /* tp_print */
    0,                             /* tp_getattr */
    0,                             /* tp_setattr */
    0,                             /* tp_compare */
    0,                             /* tp_repr */
    0,                             /* tp_as_number */
    0,                             /* tp_as_sequence */
    0,                             /* tp_as_mapping */
    0,                             /* tp_hash */
    0,                             /* tp_call */
    0,                             /* tp_str */
    0,                             /* tp_getattro */
    0,                             /* tp_setattro */
    0,                             /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT,            /* tp_flags */
    "nodetree",                    /* tp_doc */
    0,                             /* tp_traverse */
    0,                             /* tp_clear */
    0,                             /* tp_richcompare */
    0,                             /* tp_weaklistoffset */
    0,                             /* tp_iter */
    0,                             /* tp_iternext */
    ntobj_methods,                 /* tp_methods */
    0,                             /* tp_members */
    0,                             /* tp_getset */
    0,                             /* tp_base */
    0,                             /* tp_dict */
    0,                             /* tp_descr_get */
    0,                             /* tp_descr_set */
    0,                             /* tp_dictoffset */
    (initproc)ntobj_init,          /* tp_init */
    0,                             /* tp_alloc */
};

static int index_init_nt(indexObject *self)
{
	if (!self->ntinitialized) {
		if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
			nt_dealloc(&self->nt);
			return -1;
		}
		if (nt_insert(&self->nt, nullid, -1) == -1) {
			nt_dealloc(&self->nt);
			return -1;
		}
		self->ntinitialized = 1;
		self->ntrev = (int)index_length(self);
		self->ntlookups = 1;
		self->ntmisses = 0;
	}
	return 0;
}

/*
 * Return values:
 *
 *   -3: error (exception set)
 *   -2: not found (no exception set)
 * rest: valid rev
 */
static int index_find_node(indexObject *self, const char *node,
                           Py_ssize_t nodelen)
{
	int rev;

	if (index_init_nt(self) == -1)
		return -3;

	self->ntlookups++;
	rev = nt_find(&self->nt, node, nodelen, 0);
	if (rev >= -1)
		return rev;

	/*
	 * For the first handful of lookups, we scan the entire index,
	 * and cache only the matching nodes. This optimizes for cases
	 * like "hg tip", where only a few nodes are accessed.
	 *
	 * After that, we cache every node we visit, using a single
	 * scan amortized over multiple lookups.  This gives the best
	 * bulk performance, e.g. for "hg log".
	 */
	if (self->ntmisses++ < 4) {
		for (rev = self->ntrev - 1; rev >= 0; rev--) {
			const char *n = index_node_existing(self, rev);
			if (n == NULL)
				return -3;
			if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
				if (nt_insert(&self->nt, n, rev) == -1)
					return -3;
				break;
			}
		}
	} else {
		for (rev = self->ntrev - 1; rev >= 0; rev--) {
			const char *n = index_node_existing(self, rev);
			if (n == NULL)
				return -3;
			if (nt_insert(&self->nt, n, rev) == -1) {
				self->ntrev = rev + 1;
				return -3;
			}
			if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
				break;
			}
		}
		self->ntrev = rev;
	}

	if (rev >= 0)
		return rev;
	return -2;
}

static PyObject *index_getitem(indexObject *self, PyObject *value)
{
	char *node;
	int rev;

	if (PyInt_Check(value)) {
		long idx;
		if (!pylong_to_long(value, &idx)) {
			return NULL;
		}
		return index_get(self, idx);
	}

	if (node_check(value, &node) == -1)
		return NULL;
	rev = index_find_node(self, node, 20);
	if (rev >= -1)
		return PyInt_FromLong(rev);
	if (rev == -2)
		raise_revlog_error();
	return NULL;
}

/*
 * Fully populate the radix tree.
 */
static int index_populate_nt(indexObject *self)
{
	int rev;
	if (self->ntrev > 0) {
		for (rev = self->ntrev - 1; rev >= 0; rev--) {
			const char *n = index_node_existing(self, rev);
			if (n == NULL)
				return -1;
			if (nt_insert(&self->nt, n, rev) == -1)
				return -1;
		}
		self->ntrev = -1;
	}
	return 0;
}

static PyObject *index_partialmatch(indexObject *self, PyObject *args)
{
	const char *fullnode;
	Py_ssize_t nodelen;
	char *node;
	int rev, i;

	if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
		return NULL;

	if (nodelen < 1) {
		PyErr_SetString(PyExc_ValueError, "key too short");
		return NULL;
	}

	if (nodelen > 40) {
		PyErr_SetString(PyExc_ValueError, "key too long");
		return NULL;
	}

	for (i = 0; i < nodelen; i++)
		hexdigit(node, i);
	if (PyErr_Occurred()) {
		/* input contains non-hex characters */
		PyErr_Clear();
		Py_RETURN_NONE;
	}

	if (index_init_nt(self) == -1)
		return NULL;
	if (index_populate_nt(self) == -1)
		return NULL;
	rev = nt_partialmatch(&self->nt, node, nodelen);

	switch (rev) {
	case -4:
		raise_revlog_error();
		return NULL;
	case -2:
		Py_RETURN_NONE;
	case -1:
		return PyBytes_FromStringAndSize(nullid, 20);
	}

	fullnode = index_node_existing(self, rev);
	if (fullnode == NULL) {
		return NULL;
	}
	return PyBytes_FromStringAndSize(fullnode, 20);
}

static PyObject *index_shortest(indexObject *self, PyObject *args)
{
	PyObject *val;
	char *node;
	int length;

	if (!PyArg_ParseTuple(args, "O", &val))
		return NULL;
	if (node_check(val, &node) == -1)
		return NULL;

	self->ntlookups++;
	if (index_init_nt(self) == -1)
		return NULL;
	if (index_populate_nt(self) == -1)
		return NULL;
	length = nt_shortest(&self->nt, node);
	if (length == -3)
		return NULL;
	if (length == -2) {
		raise_revlog_error();
		return NULL;
	}
	return PyInt_FromLong(length);
}

static PyObject *index_m_get(indexObject *self, PyObject *args)
{
	PyObject *val;
	char *node;
	int rev;

	if (!PyArg_ParseTuple(args, "O", &val))
		return NULL;
	if (node_check(val, &node) == -1)
		return NULL;
	rev = index_find_node(self, node, 20);
	if (rev == -3)
		return NULL;
	if (rev == -2)
		Py_RETURN_NONE;
	return PyInt_FromLong(rev);
}

static int index_contains(indexObject *self, PyObject *value)
{
	char *node;

	if (PyInt_Check(value)) {
		long rev;
		if (!pylong_to_long(value, &rev)) {
			return -1;
		}
		return rev >= -1 && rev < index_length(self);
	}

	if (node_check(value, &node) == -1)
		return -1;

	switch (index_find_node(self, node, 20)) {
	case -3:
		return -1;
	case -2:
		return 0;
	default:
		return 1;
	}
}

static PyObject *index_m_has_node(indexObject *self, PyObject *args)
{
	int ret = index_contains(self, args);
	if (ret < 0)
		return NULL;
	return PyBool_FromLong((long)ret);
}

static PyObject *index_m_rev(indexObject *self, PyObject *val)
{
	char *node;
	int rev;

	if (node_check(val, &node) == -1)
		return NULL;
	rev = index_find_node(self, node, 20);
	if (rev >= -1)
		return PyInt_FromLong(rev);
	if (rev == -2)
		raise_revlog_error();
	return NULL;
}

typedef uint64_t bitmask;

/*
 * Given a disjoint set of revs, return all candidates for the
 * greatest common ancestor. In revset notation, this is the set
 * "heads(::a and ::b and ...)"
 */
static PyObject *find_gca_candidates(indexObject *self, const int *revs,
                                     int revcount)
{
	const bitmask allseen = (1ull << revcount) - 1;
	const bitmask poison = 1ull << revcount;
	PyObject *gca = PyList_New(0);
	int i, v, interesting;
	int maxrev = -1;
	bitmask sp;
	bitmask *seen;

	if (gca == NULL)
		return PyErr_NoMemory();

	for (i = 0; i < revcount; i++) {
		if (revs[i] > maxrev)
			maxrev = revs[i];
	}

	seen = calloc(sizeof(*seen), maxrev + 1);
	if (seen == NULL) {
		Py_DECREF(gca);
		return PyErr_NoMemory();
	}

	for (i = 0; i < revcount; i++)
		seen[revs[i]] = 1ull << i;

	interesting = revcount;

	for (v = maxrev; v >= 0 && interesting; v--) {
		bitmask sv = seen[v];
		int parents[2];

		if (!sv)
			continue;

		if (sv < poison) {
			interesting -= 1;
			if (sv == allseen) {
				PyObject *obj = PyInt_FromLong(v);
				if (obj == NULL)
					goto bail;
				if (PyList_Append(gca, obj) == -1) {
					Py_DECREF(obj);
					goto bail;
				}
				sv |= poison;
				for (i = 0; i < revcount; i++) {
					if (revs[i] == v)
						goto done;
				}
			}
		}
		if (index_get_parents(self, v, parents, maxrev) < 0)
			goto bail;

		for (i = 0; i < 2; i++) {
			int p = parents[i];
			if (p == -1)
				continue;
			sp = seen[p];
			if (sv < poison) {
				if (sp == 0) {
					seen[p] = sv;
					interesting++;
				} else if (sp != sv)
					seen[p] |= sv;
			} else {
				if (sp && sp < poison)
					interesting--;
				seen[p] = sv;
			}
		}
	}

done:
	free(seen);
	return gca;
bail:
	free(seen);
	Py_XDECREF(gca);
	return NULL;
}

/*
 * Given a disjoint set of revs, return the subset with the longest
 * path to the root.
 */
static PyObject *find_deepest(indexObject *self, PyObject *revs)
{
	const Py_ssize_t revcount = PyList_GET_SIZE(revs);
	static const Py_ssize_t capacity = 24;
	int *depth, *interesting = NULL;
	int i, j, v, ninteresting;
	PyObject *dict = NULL, *keys = NULL;
	long *seen = NULL;
	int maxrev = -1;
	long final;

	if (revcount > capacity) {
		PyErr_Format(PyExc_OverflowError,
		             "bitset size (%ld) > capacity (%ld)",
		             (long)revcount, (long)capacity);
		return NULL;
	}

	for (i = 0; i < revcount; i++) {
		int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
		if (n > maxrev)
			maxrev = n;
	}

	depth = calloc(sizeof(*depth), maxrev + 1);
	if (depth == NULL)
		return PyErr_NoMemory();

	seen = calloc(sizeof(*seen), maxrev + 1);
	if (seen == NULL) {
		PyErr_NoMemory();
		goto bail;
	}

	interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
	if (interesting == NULL) {
		PyErr_NoMemory();
		goto bail;
	}

	if (PyList_Sort(revs) == -1)
		goto bail;

	for (i = 0; i < revcount; i++) {
		int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
		long b = 1l << i;
		depth[n] = 1;
		seen[n] = b;
		interesting[b] = 1;
	}

	/* invariant: ninteresting is the number of non-zero entries in
	 * interesting. */
	ninteresting = (int)revcount;

	for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
		int dv = depth[v];
		int parents[2];
		long sv;

		if (dv == 0)
			continue;

		sv = seen[v];
		if (index_get_parents(self, v, parents, maxrev) < 0)
			goto bail;

		for (i = 0; i < 2; i++) {
			int p = parents[i];
			long sp;
			int dp;

			if (p == -1)
				continue;

			dp = depth[p];
			sp = seen[p];
			if (dp <= dv) {
				depth[p] = dv + 1;
				if (sp != sv) {
					interesting[sv] += 1;
					seen[p] = sv;
					if (sp) {
						interesting[sp] -= 1;
						if (interesting[sp] == 0)
							ninteresting -= 1;
					}
				}
			} else if (dv == dp - 1) {
				long nsp = sp | sv;
				if (nsp == sp)
					continue;
				seen[p] = nsp;
				interesting[sp] -= 1;
				if (interesting[sp] == 0)
					ninteresting -= 1;
				if (interesting[nsp] == 0)
					ninteresting += 1;
				interesting[nsp] += 1;
			}
		}
		interesting[sv] -= 1;
		if (interesting[sv] == 0)
			ninteresting -= 1;
	}

	final = 0;
	j = ninteresting;
	for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
		if (interesting[i] == 0)
			continue;
		final |= i;
		j -= 1;
	}
	if (final == 0) {
		keys = PyList_New(0);
		goto bail;
	}

	dict = PyDict_New();
	if (dict == NULL)
		goto bail;

	for (i = 0; i < revcount; i++) {
		PyObject *key;

		if ((final & (1 << i)) == 0)
			continue;

		key = PyList_GET_ITEM(revs, i);
		Py_INCREF(key);
		Py_INCREF(Py_None);
		if (PyDict_SetItem(dict, key, Py_None) == -1) {
			Py_DECREF(key);
			Py_DECREF(Py_None);
			goto bail;
		}
	}

	keys = PyDict_Keys(dict);

bail:
	free(depth);
	free(seen);
	free(interesting);
	Py_XDECREF(dict);

	return keys;
}

/*
 * Given a (possibly overlapping) set of revs, return all the
 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
 */
static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
{
	PyObject *ret = NULL;
	Py_ssize_t argcount, i, len;
	bitmask repeat = 0;
	int revcount = 0;
	int *revs;

	argcount = PySequence_Length(args);
	revs = PyMem_Malloc(argcount * sizeof(*revs));
	if (argcount > 0 && revs == NULL)
		return PyErr_NoMemory();
	len = index_length(self);

	for (i = 0; i < argcount; i++) {
		static const int capacity = 24;
		PyObject *obj = PySequence_GetItem(args, i);
		bitmask x;
		long val;

		if (!PyInt_Check(obj)) {
			PyErr_SetString(PyExc_TypeError,
			                "arguments must all be ints");
			Py_DECREF(obj);
			goto bail;
		}
		val = PyInt_AsLong(obj);
		Py_DECREF(obj);
		if (val == -1) {
			ret = PyList_New(0);
			goto done;
		}
		if (val < 0 || val >= len) {
			PyErr_SetString(PyExc_IndexError, "index out of range");
			goto bail;
		}
		/* this cheesy bloom filter lets us avoid some more
		 * expensive duplicate checks in the common set-is-disjoint
		 * case */
		x = 1ull << (val & 0x3f);
		if (repeat & x) {
			int k;
			for (k = 0; k < revcount; k++) {
				if (val == revs[k])
					goto duplicate;
			}
		} else
			repeat |= x;
		if (revcount >= capacity) {
			PyErr_Format(PyExc_OverflowError,
			             "bitset size (%d) > capacity (%d)",
			             revcount, capacity);
			goto bail;
		}
		revs[revcount++] = (int)val;
	duplicate:;
	}

	if (revcount == 0) {
		ret = PyList_New(0);
		goto done;
	}
	if (revcount == 1) {
		PyObject *obj;
		ret = PyList_New(1);
		if (ret == NULL)
			goto bail;
		obj = PyInt_FromLong(revs[0]);
		if (obj == NULL)
			goto bail;
		PyList_SET_ITEM(ret, 0, obj);
		goto done;
	}

	ret = find_gca_candidates(self, revs, revcount);
	if (ret == NULL)
		goto bail;

done:
	PyMem_Free(revs);
	return ret;

bail:
	PyMem_Free(revs);
	Py_XDECREF(ret);
	return NULL;
}

/*
 * Given a (possibly overlapping) set of revs, return the greatest
 * common ancestors: those with the longest path to the root.
 */
static PyObject *index_ancestors(indexObject *self, PyObject *args)
{
	PyObject *ret;
	PyObject *gca = index_commonancestorsheads(self, args);
	if (gca == NULL)
		return NULL;

	if (PyList_GET_SIZE(gca) <= 1) {
		return gca;
	}

	ret = find_deepest(self, gca);
	Py_DECREF(gca);
	return ret;
}

/*
 * Invalidate any trie entries introduced by added revs.
 */
static void index_invalidate_added(indexObject *self, Py_ssize_t start)
{
	Py_ssize_t i, len = PyList_GET_SIZE(self->added);

	for (i = start; i < len; i++) {
		PyObject *tuple = PyList_GET_ITEM(self->added, i);
		PyObject *node = PyTuple_GET_ITEM(tuple, 7);

		nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
	}

	if (start == 0)
		Py_CLEAR(self->added);
}

/*
 * Delete a numeric range of revs, which must be at the end of the
 * range.
 */
static int index_slice_del(indexObject *self, PyObject *item)
{
	Py_ssize_t start, stop, step, slicelength;
	Py_ssize_t length = index_length(self) + 1;
	int ret = 0;

/* Argument changed from PySliceObject* to PyObject* in Python 3. */
#ifdef IS_PY3K
	if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
	                         &slicelength) < 0)
#else
	if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
	                         &step, &slicelength) < 0)
#endif
		return -1;

	if (slicelength <= 0)
		return 0;

	if ((step < 0 && start < stop) || (step > 0 && start > stop))
		stop = start;

	if (step < 0) {
		stop = start + 1;
		start = stop + step * (slicelength - 1) - 1;
		step = -step;
	}

	if (step != 1) {
		PyErr_SetString(PyExc_ValueError,
		                "revlog index delete requires step size of 1");
		return -1;
	}

	if (stop != length - 1) {
		PyErr_SetString(PyExc_IndexError,
		                "revlog index deletion indices are invalid");
		return -1;
	}

	if (start < self->length) {
		if (self->ntinitialized) {
			Py_ssize_t i;

			for (i = start; i < self->length; i++) {
				const char *node = index_node_existing(self, i);
				if (node == NULL)
					return -1;

				nt_delete_node(&self->nt, node);
			}
			if (self->added)
				index_invalidate_added(self, 0);
			if (self->ntrev > start)
				self->ntrev = (int)start;
		} else if (self->added) {
			Py_CLEAR(self->added);
		}

		self->length = start;
		if (start < self->raw_length) {
			if (self->cache) {
				Py_ssize_t i;
				for (i = start; i < self->raw_length; i++)
					Py_CLEAR(self->cache[i]);
			}
			self->raw_length = start;
		}
		goto done;
	}

	if (self->ntinitialized) {
		index_invalidate_added(self, start - self->length);
		if (self->ntrev > start)
			self->ntrev = (int)start;
	}
	if (self->added)
		ret = PyList_SetSlice(self->added, start - self->length,
		                      PyList_GET_SIZE(self->added), NULL);
done:
	Py_CLEAR(self->headrevs);
	return ret;
}

/*
 * Supported ops:
 *
 * slice deletion
 * string assignment (extend node->rev mapping)
 * string deletion (shrink node->rev mapping)
 */
static int index_assign_subscript(indexObject *self, PyObject *item,
                                  PyObject *value)
{
	char *node;
	long rev;

	if (PySlice_Check(item) && value == NULL)
		return index_slice_del(self, item);

	if (node_check(item, &node) == -1)
		return -1;

	if (value == NULL)
		return self->ntinitialized ? nt_delete_node(&self->nt, node)
		                           : 0;
	rev = PyInt_AsLong(value);
	if (rev > INT_MAX || rev < 0) {
		if (!PyErr_Occurred())
			PyErr_SetString(PyExc_ValueError, "rev out of range");
		return -1;
	}

	if (index_init_nt(self) == -1)
		return -1;
	return nt_insert(&self->nt, node, (int)rev);
}

/*
 * Find all RevlogNG entries in an index that has inline data. Update
 * the optional "offsets" table with those entries.
 */
static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
{
	const char *data = (const char *)self->buf.buf;
	Py_ssize_t pos = 0;
	Py_ssize_t end = self->buf.len;
	long incr = v1_hdrsize;
	Py_ssize_t len = 0;

	while (pos + v1_hdrsize <= end && pos >= 0) {
		uint32_t comp_len;
		/* 3rd element of header is length of compressed inline data */
		comp_len = getbe32(data + pos + 8);
		incr = v1_hdrsize + comp_len;
		if (offsets)
			offsets[len] = data + pos;
		len++;
		pos += incr;
	}

	if (pos != end) {
		if (!PyErr_Occurred())
			PyErr_SetString(PyExc_ValueError, "corrupt index file");
		return -1;
	}

	return len;
}

static int index_init(indexObject *self, PyObject *args)
{
	PyObject *data_obj, *inlined_obj;
	Py_ssize_t size;

	/* Initialize before argument-checking to avoid index_dealloc() crash.
	 */
	self->raw_length = 0;
	self->added = NULL;
	self->cache = NULL;
	self->data = NULL;
	memset(&self->buf, 0, sizeof(self->buf));
	self->headrevs = NULL;
	self->filteredrevs = Py_None;
	Py_INCREF(Py_None);
	self->ntinitialized = 0;
	self->offsets = NULL;

	if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
		return -1;
	if (!PyObject_CheckBuffer(data_obj)) {
		PyErr_SetString(PyExc_TypeError,
		                "data does not support buffer interface");
		return -1;
	}

	if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
		return -1;
	size = self->buf.len;

	self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
	self->data = data_obj;

	self->ntlookups = self->ntmisses = 0;
	self->ntrev = -1;
	Py_INCREF(self->data);

	if (self->inlined) {
		Py_ssize_t len = inline_scan(self, NULL);
		if (len == -1)
			goto bail;
		self->raw_length = len;
		self->length = len;
	} else {
		if (size % v1_hdrsize) {
			PyErr_SetString(PyExc_ValueError, "corrupt index file");
			goto bail;
		}
		self->raw_length = size / v1_hdrsize;
		self->length = self->raw_length;
	}

	return 0;
bail:
	return -1;
}

static PyObject *index_nodemap(indexObject *self)
{
	Py_INCREF(self);
	return (PyObject *)self;
}

static void _index_clearcaches(indexObject *self)
{
	if (self->cache) {
		Py_ssize_t i;

		for (i = 0; i < self->raw_length; i++)
			Py_CLEAR(self->cache[i]);
		free(self->cache);
		self->cache = NULL;
	}
	if (self->offsets) {
		PyMem_Free((void *)self->offsets);
		self->offsets = NULL;
	}
	if (self->ntinitialized) {
		nt_dealloc(&self->nt);
	}
	self->ntinitialized = 0;
	Py_CLEAR(self->headrevs);
}

static PyObject *index_clearcaches(indexObject *self)
{
	_index_clearcaches(self);
	self->ntrev = -1;
	self->ntlookups = self->ntmisses = 0;
	Py_RETURN_NONE;
}

static void index_dealloc(indexObject *self)
{
	_index_clearcaches(self);
	Py_XDECREF(self->filteredrevs);
	if (self->buf.buf) {
		PyBuffer_Release(&self->buf);
		memset(&self->buf, 0, sizeof(self->buf));
	}
	Py_XDECREF(self->data);
	Py_XDECREF(self->added);
	PyObject_Del(self);
}

static PySequenceMethods index_sequence_methods = {
    (lenfunc)index_length,      /* sq_length */
    0,                          /* sq_concat */
    0,                          /* sq_repeat */
    (ssizeargfunc)index_get,    /* sq_item */
    0,                          /* sq_slice */
    0,                          /* sq_ass_item */
    0,                          /* sq_ass_slice */
    (objobjproc)index_contains, /* sq_contains */
};

static PyMappingMethods index_mapping_methods = {
    (lenfunc)index_length,                 /* mp_length */
    (binaryfunc)index_getitem,             /* mp_subscript */
    (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
};

static PyMethodDef index_methods[] = {
    {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
     "return the gca set of the given revs"},
    {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
     METH_VARARGS,
     "return the heads of the common ancestors of the given revs"},
    {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
     "clear the index caches"},
    {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
    {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
     "return `rev` associated with a node or None"},
    {"has_node", (PyCFunction)index_m_has_node, METH_O,
     "return True if the node exist in the index"},
    {"rev", (PyCFunction)index_m_rev, METH_O,
     "return `rev` associated with a node or raise RevlogError"},
    {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
     "compute phases"},
    {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
     "reachableroots"},
    {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
     "get head revisions"}, /* Can do filtering since 3.2 */
    {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
     "get filtered head revisions"}, /* Can always do filtering */
    {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
     "True if the object is a snapshot"},
    {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
     "Gather snapshot data in a cache dict"},
    {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
     "determine revisions with deltas to reconstruct fulltext"},
    {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
     METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
    {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
    {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
     "match a potentially ambiguous node ID"},
    {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
     "find length of shortest hex nodeid of a binary ID"},
    {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
    {NULL} /* Sentinel */
};

static PyGetSetDef index_getset[] = {
    {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
    {NULL} /* Sentinel */
};

PyTypeObject HgRevlogIndex_Type = {
    PyVarObject_HEAD_INIT(NULL, 0) /* header */
    "parsers.index",               /* tp_name */
    sizeof(indexObject),           /* tp_basicsize */
    0,                             /* tp_itemsize */
    (destructor)index_dealloc,     /* tp_dealloc */
    0,                             /* tp_print */
    0,                             /* tp_getattr */
    0,                             /* tp_setattr */
    0,                             /* tp_compare */
    0,                             /* tp_repr */
    0,                             /* tp_as_number */
    &index_sequence_methods,       /* tp_as_sequence */
    &index_mapping_methods,        /* tp_as_mapping */
    0,                             /* tp_hash */
    0,                             /* tp_call */
    0,                             /* tp_str */
    0,                             /* tp_getattro */
    0,                             /* tp_setattro */
    0,                             /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT,            /* tp_flags */
    "revlog index",                /* tp_doc */
    0,                             /* tp_traverse */
    0,                             /* tp_clear */
    0,                             /* tp_richcompare */
    0,                             /* tp_weaklistoffset */
    0,                             /* tp_iter */
    0,                             /* tp_iternext */
    index_methods,                 /* tp_methods */
    0,                             /* tp_members */
    index_getset,                  /* tp_getset */
    0,                             /* tp_base */
    0,                             /* tp_dict */
    0,                             /* tp_descr_get */
    0,                             /* tp_descr_set */
    0,                             /* tp_dictoffset */
    (initproc)index_init,          /* tp_init */
    0,                             /* tp_alloc */
};

/*
 * returns a tuple of the form (index, index, cache) with elements as
 * follows:
 *
 * index: an index object that lazily parses RevlogNG records
 * cache: if data is inlined, a tuple (0, index_file_content), else None
 *        index_file_content could be a string, or a buffer
 *
 * added complications are for backwards compatibility
 */
PyObject *parse_index2(PyObject *self, PyObject *args)
{
	PyObject *cache = NULL;
	indexObject *idx;
	int ret;

	idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
	if (idx == NULL)
		goto bail;

	ret = index_init(idx, args);
	if (ret == -1)
		goto bail;

	if (idx->inlined) {
		cache = Py_BuildValue("iO", 0, idx->data);
		if (cache == NULL)
			goto bail;
	} else {
		cache = Py_None;
		Py_INCREF(cache);
	}

	return Py_BuildValue("NN", idx, cache);

bail:
	Py_XDECREF(idx);
	Py_XDECREF(cache);
	return NULL;
}

static Revlog_CAPI CAPI = {
    /* increment the abi_version field upon each change in the Revlog_CAPI
       struct or in the ABI of the listed functions */
    2,
    index_length,
    index_node,
    HgRevlogIndex_GetParents,
};

void revlog_module_init(PyObject *mod)
{
	PyObject *caps = NULL;
	HgRevlogIndex_Type.tp_new = PyType_GenericNew;
	if (PyType_Ready(&HgRevlogIndex_Type) < 0)
		return;
	Py_INCREF(&HgRevlogIndex_Type);
	PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);

	nodetreeType.tp_new = PyType_GenericNew;
	if (PyType_Ready(&nodetreeType) < 0)
		return;
	Py_INCREF(&nodetreeType);
	PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);

	if (!nullentry) {
		nullentry =
		    Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
		                  -1, -1, -1, nullid, (Py_ssize_t)20);
	}
	if (nullentry)
		PyObject_GC_UnTrack(nullentry);

	caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
	if (caps != NULL)
		PyModule_AddObject(mod, "revlog_CAPI", caps);
}