view mercurial/cext/manifest.c @ 45053:cc2572923ea3

revlog: avoid hard-coded hash sizes Differential Revision: https://phab.mercurial-scm.org/D8683
author Joerg Sonnenberger <joerg@bec.de>
date Mon, 06 Jul 2020 14:30:13 +0200
parents 0b0e72b5d551
children d0ef8c1dddd4
line wrap: on
line source

/*
 * manifest.c - manifest type that does on-demand parsing.
 *
 * Copyright 2015, Google Inc.
 *
 * This software may be used and distributed according to the terms of
 * the GNU General Public License, incorporated herein by reference.
 */
#include <Python.h>

#include <assert.h>
#include <stdlib.h>
#include <string.h>

#include "charencode.h"
#include "util.h"

#define DEFAULT_LINES 100000

typedef struct {
	char *start;
	Py_ssize_t len; /* length of line including terminal newline */
	char hash_suffix;
	bool from_malloc;
	bool deleted;
} line;

typedef struct {
	PyObject_HEAD
	PyObject *pydata;
	line *lines;
	int numlines; /* number of line entries */
	int livelines; /* number of non-deleted lines */
	int maxlines; /* allocated number of lines */
	bool dirty;
} lazymanifest;

#define MANIFEST_OOM -1
#define MANIFEST_NOT_SORTED -2
#define MANIFEST_MALFORMED -3
#define MANIFEST_BOGUS_FILENAME -4
#define MANIFEST_TOO_SHORT_LINE -5

/* get the length of the path for a line */
static Py_ssize_t pathlen(line *l)
{
	const char *end = memchr(l->start, '\0', l->len);
	return (end) ? (Py_ssize_t)(end - l->start) : l->len;
}

/* get the node value of a single line */
static PyObject *nodeof(line *l)
{
	char *s = l->start;
	Py_ssize_t llen = pathlen(l);
	Py_ssize_t hlen = l->len - llen - 2;
	Py_ssize_t hlen_raw = 20;
	PyObject *hash;
	if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
		PyErr_SetString(PyExc_ValueError, "manifest line too short");
		return NULL;
	}
	switch (hlen) {
	case 40: /* sha1 */
	case 41: /* sha1 with cruft for a merge */
		break;
	case 64: /* new hash */
	case 65: /* new hash with cruft for a merge */
		hlen_raw = 32;
		break;
	default:
		PyErr_SetString(PyExc_ValueError, "invalid node length in manifest");
		return NULL;
	}
	hash = unhexlify(s + llen + 1, hlen_raw * 2);
	if (!hash) {
		return NULL;
	}
	if (l->hash_suffix != '\0') {
		char newhash[33];
		memcpy(newhash, PyBytes_AsString(hash), hlen_raw);
		Py_DECREF(hash);
		newhash[hlen_raw] = l->hash_suffix;
		hash = PyBytes_FromStringAndSize(newhash, hlen_raw+1);
	}
	return hash;
}

/* get the node hash and flags of a line as a tuple */
static PyObject *hashflags(line *l)
{
	char *s = l->start;
	Py_ssize_t plen = pathlen(l);
	PyObject *hash = nodeof(l);
	ssize_t hlen;
	Py_ssize_t hplen, flen;
	PyObject *flags;
	PyObject *tup;

	if (!hash)
		return NULL;
	/* hash is either 20 or 21 bytes for an old hash, so we use a
	   ternary here to get the "real" hexlified sha length. */
	hlen = PyBytes_GET_SIZE(hash) < 22 ? 40 : 64;
	/* 1 for null byte, 1 for newline */
	hplen = plen + hlen + 2;
	flen = l->len - hplen;

	flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
	if (!flags) {
		Py_DECREF(hash);
		return NULL;
	}
	tup = PyTuple_Pack(2, hash, flags);
	Py_DECREF(flags);
	Py_DECREF(hash);
	return tup;
}

/* if we're about to run out of space in the line index, add more */
static bool realloc_if_full(lazymanifest *self)
{
	if (self->numlines == self->maxlines) {
		self->maxlines *= 2;
		self->lines = realloc(self->lines, self->maxlines * sizeof(line));
	}
	return !!self->lines;
}

/*
 * Find the line boundaries in the manifest that 'data' points to and store
 * information about each line in 'self'.
 */
static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
{
	char *prev = NULL;
	while (len > 0) {
		line *l;
		char *next;
		if (*data == '\0') {
			/* It's implausible there's no filename, don't
			 * even bother looking for the newline. */
			return MANIFEST_BOGUS_FILENAME;
		}
		next = memchr(data, '\n', len);
		if (!next) {
			return MANIFEST_MALFORMED;
		}
		if ((next - data) < 42) {
			/* We should have at least 42 bytes in a line:
			   1 byte filename
			   1 NUL
			   40 bytes of hash
			   so we can give up here.
			*/
			return MANIFEST_TOO_SHORT_LINE;
		}
		next++; /* advance past newline */
		if (prev && strcmp(prev, data) > -1) {
			/* This data isn't sorted, so we have to abort. */
			return MANIFEST_NOT_SORTED;
		}
		if (!realloc_if_full(self)) {
			return MANIFEST_OOM; /* no memory */
		}
		l = self->lines + ((self->numlines)++);
		l->start = data;
		l->len = next - data;
		l->hash_suffix = '\0';
		l->from_malloc = false;
		l->deleted = false;
		len = len - l->len;
		prev = data;
		data = next;
	}
	self->livelines = self->numlines;
	return 0;
}

static void lazymanifest_init_early(lazymanifest *self)
{
	self->pydata = NULL;
	self->lines = NULL;
	self->numlines = 0;
	self->maxlines = 0;
}

static int lazymanifest_init(lazymanifest *self, PyObject *args)
{
	char *data;
	Py_ssize_t len;
	int err, ret;
	PyObject *pydata;

	lazymanifest_init_early(self);
	if (!PyArg_ParseTuple(args, "S", &pydata)) {
		return -1;
	}
	err = PyBytes_AsStringAndSize(pydata, &data, &len);

	self->dirty = false;
	if (err == -1)
		return -1;
	self->pydata = pydata;
	Py_INCREF(self->pydata);
	Py_BEGIN_ALLOW_THREADS
	self->lines = malloc(DEFAULT_LINES * sizeof(line));
	self->maxlines = DEFAULT_LINES;
	self->numlines = 0;
	if (!self->lines)
		ret = MANIFEST_OOM;
	else
		ret = find_lines(self, data, len);
	Py_END_ALLOW_THREADS
	switch (ret) {
	case 0:
		break;
	case MANIFEST_OOM:
		PyErr_NoMemory();
		break;
	case MANIFEST_NOT_SORTED:
		PyErr_Format(PyExc_ValueError,
			     "Manifest lines not in sorted order.");
		break;
	case MANIFEST_MALFORMED:
		PyErr_Format(PyExc_ValueError,
			     "Manifest did not end in a newline.");
		break;
	case MANIFEST_BOGUS_FILENAME:
		PyErr_Format(
			PyExc_ValueError,
			"Manifest had an entry with a zero-length filename.");
		break;
	case MANIFEST_TOO_SHORT_LINE:
		PyErr_Format(
			PyExc_ValueError,
			"Manifest had implausibly-short line.");
		break;
	default:
		PyErr_Format(PyExc_ValueError,
			     "Unknown problem parsing manifest.");
	}
	return ret == 0 ? 0 : -1;
}

static void lazymanifest_dealloc(lazymanifest *self)
{
	/* free any extra lines we had to allocate */
	int i;
	for (i = 0; self->lines && (i < self->numlines); i++) {
		if (self->lines[i].from_malloc) {
			free(self->lines[i].start);
		}
	}
	free(self->lines);
	self->lines = NULL;
	if (self->pydata) {
		Py_DECREF(self->pydata);
		self->pydata = NULL;
	}
	PyObject_Del(self);
}

/* iteration support */

typedef struct {
	PyObject_HEAD lazymanifest *m;
	Py_ssize_t pos;
} lmIter;

static void lmiter_dealloc(PyObject *o)
{
	lmIter *self = (lmIter *)o;
	Py_DECREF(self->m);
	PyObject_Del(self);
}

static line *lmiter_nextline(lmIter *self)
{
	do {
		self->pos++;
		if (self->pos >= self->m->numlines) {
			return NULL;
		}
		/* skip over deleted manifest entries */
	} while (self->m->lines[self->pos].deleted);
	return self->m->lines + self->pos;
}

static PyObject *lmiter_iterentriesnext(PyObject *o)
{
	Py_ssize_t pl;
	line *l;
	Py_ssize_t consumed;
	PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
	l = lmiter_nextline((lmIter *)o);
	if (!l) {
		goto done;
	}
	pl = pathlen(l);
	path = PyBytes_FromStringAndSize(l->start, pl);
	hash = nodeof(l);
	if (!path || !hash) {
		goto done;
	}
	consumed = pl + 41;
	flags = PyBytes_FromStringAndSize(l->start + consumed,
					   l->len - consumed - 1);
	if (!flags) {
		goto done;
	}
	ret = PyTuple_Pack(3, path, hash, flags);
done:
	Py_XDECREF(path);
	Py_XDECREF(hash);
	Py_XDECREF(flags);
	return ret;
}

#ifdef IS_PY3K
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
	| Py_TPFLAGS_HAVE_ITER
#endif

static PyTypeObject lazymanifestEntriesIterator = {
	PyVarObject_HEAD_INIT(NULL, 0) /* header */
	"parsers.lazymanifest.entriesiterator", /*tp_name */
	sizeof(lmIter),                  /*tp_basicsize */
	0,                               /*tp_itemsize */
	lmiter_dealloc,                  /*tp_dealloc */
	0,                               /*tp_print */
	0,                               /*tp_getattr */
	0,                               /*tp_setattr */
	0,                               /*tp_compare */
	0,                               /*tp_repr */
	0,                               /*tp_as_number */
	0,                               /*tp_as_sequence */
	0,                               /*tp_as_mapping */
	0,                               /*tp_hash */
	0,                               /*tp_call */
	0,                               /*tp_str */
	0,                               /*tp_getattro */
	0,                               /*tp_setattro */
	0,                               /*tp_as_buffer */
	LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
	"Iterator for 3-tuples in a lazymanifest.",  /* tp_doc */
	0,                               /* tp_traverse */
	0,                               /* tp_clear */
	0,                               /* tp_richcompare */
	0,                               /* tp_weaklistoffset */
	PyObject_SelfIter,               /* tp_iter: __iter__() method */
	lmiter_iterentriesnext,          /* tp_iternext: next() method */
};

static PyObject *lmiter_iterkeysnext(PyObject *o)
{
	Py_ssize_t pl;
	line *l = lmiter_nextline((lmIter *)o);
	if (!l) {
		return NULL;
	}
	pl = pathlen(l);
	return PyBytes_FromStringAndSize(l->start, pl);
}

#ifdef IS_PY3K
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
	| Py_TPFLAGS_HAVE_ITER
#endif

static PyTypeObject lazymanifestKeysIterator = {
	PyVarObject_HEAD_INIT(NULL, 0) /* header */
	"parsers.lazymanifest.keysiterator", /*tp_name */
	sizeof(lmIter),                  /*tp_basicsize */
	0,                               /*tp_itemsize */
	lmiter_dealloc,                  /*tp_dealloc */
	0,                               /*tp_print */
	0,                               /*tp_getattr */
	0,                               /*tp_setattr */
	0,                               /*tp_compare */
	0,                               /*tp_repr */
	0,                               /*tp_as_number */
	0,                               /*tp_as_sequence */
	0,                               /*tp_as_mapping */
	0,                               /*tp_hash */
	0,                               /*tp_call */
	0,                               /*tp_str */
	0,                               /*tp_getattro */
	0,                               /*tp_setattro */
	0,                               /*tp_as_buffer */
	LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
	"Keys iterator for a lazymanifest.",  /* tp_doc */
	0,                               /* tp_traverse */
	0,                               /* tp_clear */
	0,                               /* tp_richcompare */
	0,                               /* tp_weaklistoffset */
	PyObject_SelfIter,               /* tp_iter: __iter__() method */
	lmiter_iterkeysnext,             /* tp_iternext: next() method */
};

static lazymanifest *lazymanifest_copy(lazymanifest *self);

static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
{
	lmIter *i = NULL;
	lazymanifest *t = lazymanifest_copy(self);
	if (!t) {
		PyErr_NoMemory();
		return NULL;
	}
	i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
	if (i) {
		i->m = t;
		i->pos = -1;
	} else {
		Py_DECREF(t);
		PyErr_NoMemory();
	}
	return (PyObject *)i;
}

static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
{
	lmIter *i = NULL;
	lazymanifest *t = lazymanifest_copy(self);
	if (!t) {
		PyErr_NoMemory();
		return NULL;
	}
	i = PyObject_New(lmIter, &lazymanifestKeysIterator);
	if (i) {
		i->m = t;
		i->pos = -1;
	} else {
		Py_DECREF(t);
		PyErr_NoMemory();
	}
	return (PyObject *)i;
}

/* __getitem__ and __setitem__ support */

static Py_ssize_t lazymanifest_size(lazymanifest *self)
{
	return self->livelines;
}

static int linecmp(const void *left, const void *right)
{
	return strcmp(((const line *)left)->start,
		      ((const line *)right)->start);
}

static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
{
	line needle;
	line *hit;
	if (!PyBytes_Check(key)) {
		PyErr_Format(PyExc_TypeError,
			     "getitem: manifest keys must be a string.");
		return NULL;
	}
	needle.start = PyBytes_AsString(key);
	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
		      &linecmp);
	if (!hit || hit->deleted) {
		PyErr_Format(PyExc_KeyError, "No such manifest entry.");
		return NULL;
	}
	return hashflags(hit);
}

static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
{
	line needle;
	line *hit;
	if (!PyBytes_Check(key)) {
		PyErr_Format(PyExc_TypeError,
			     "delitem: manifest keys must be a string.");
		return -1;
	}
	needle.start = PyBytes_AsString(key);
	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
		      &linecmp);
	if (!hit || hit->deleted) {
		PyErr_Format(PyExc_KeyError,
			     "Tried to delete nonexistent manifest entry.");
		return -1;
	}
	self->dirty = true;
	hit->deleted = true;
	self->livelines--;
	return 0;
}

/* Do a binary search for the insertion point for new, creating the
 * new entry if needed. */
static int internalsetitem(lazymanifest *self, line *new)
{
	int start = 0, end = self->numlines;
	while (start < end) {
		int pos = start + (end - start) / 2;
		int c = linecmp(new, self->lines + pos);
		if (c < 0)
			end = pos;
		else if (c > 0)
			start = pos + 1;
		else {
			if (self->lines[pos].deleted)
				self->livelines++;
			if (self->lines[pos].from_malloc)
				free(self->lines[pos].start);
			start = pos;
			goto finish;
		}
	}
	/* being here means we need to do an insert */
	if (!realloc_if_full(self)) {
		PyErr_NoMemory();
		return -1;
	}
	memmove(self->lines + start + 1, self->lines + start,
		(self->numlines - start) * sizeof(line));
	self->numlines++;
	self->livelines++;
finish:
	self->lines[start] = *new;
	self->dirty = true;
	return 0;
}

static int lazymanifest_setitem(
	lazymanifest *self, PyObject *key, PyObject *value)
{
	char *path;
	Py_ssize_t plen;
	PyObject *pyhash;
	Py_ssize_t hlen;
	char *hash;
	PyObject *pyflags;
	char *flags;
	Py_ssize_t flen;
	Py_ssize_t dlen;
	char *dest;
	int i;
	line new;
	if (!PyBytes_Check(key)) {
		PyErr_Format(PyExc_TypeError,
			     "setitem: manifest keys must be a string.");
		return -1;
	}
	if (!value) {
		return lazymanifest_delitem(self, key);
	}
	if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
		PyErr_Format(PyExc_TypeError,
			     "Manifest values must be a tuple of (node, flags).");
		return -1;
	}
	if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
		return -1;
	}

	pyhash = PyTuple_GetItem(value, 0);
	if (!PyBytes_Check(pyhash)) {
		PyErr_Format(PyExc_TypeError,
			     "node must be a 20-byte string");
		return -1;
	}
	hlen = PyBytes_Size(pyhash);
	/* Some parts of the codebase try and set 21 or 22
	 * byte "hash" values in order to perturb things for
	 * status. We have to preserve at least the 21st
	 * byte. Sigh. If there's a 22nd byte, we drop it on
	 * the floor, which works fine.
	 */
	if (hlen != 20 && hlen != 21 && hlen != 22) {
		PyErr_Format(PyExc_TypeError,
			     "node must be a 20-byte string");
		return -1;
	}
	hash = PyBytes_AsString(pyhash);

	pyflags = PyTuple_GetItem(value, 1);
	if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
		PyErr_Format(PyExc_TypeError,
			     "flags must a 0 or 1 byte string");
		return -1;
	}
	if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
		return -1;
	}
	/* one null byte and one newline */
	dlen = plen + 41 + flen + 1;
	dest = malloc(dlen);
	if (!dest) {
		PyErr_NoMemory();
		return -1;
	}
	memcpy(dest, path, plen + 1);
	for (i = 0; i < 20; i++) {
		/* Cast to unsigned, so it will not get sign-extended when promoted
		 * to int (as is done when passing to a variadic function)
		 */
		sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
	}
	memcpy(dest + plen + 41, flags, flen);
	dest[plen + 41 + flen] = '\n';
	new.start = dest;
	new.len = dlen;
	new.hash_suffix = '\0';
	if (hlen > 20) {
		new.hash_suffix = hash[20];
	}
	new.from_malloc = true;     /* is `start` a pointer we allocated? */
	new.deleted = false;        /* is this entry deleted? */
	if (internalsetitem(self, &new)) {
		return -1;
	}
	return 0;
}

static PyMappingMethods lazymanifest_mapping_methods = {
	(lenfunc)lazymanifest_size,             /* mp_length */
	(binaryfunc)lazymanifest_getitem,       /* mp_subscript */
	(objobjargproc)lazymanifest_setitem,    /* mp_ass_subscript */
};

/* sequence methods (important or __contains__ builds an iterator) */

static int lazymanifest_contains(lazymanifest *self, PyObject *key)
{
	line needle;
	line *hit;
	if (!PyBytes_Check(key)) {
		/* Our keys are always strings, so if the contains
		 * check is for a non-string, just return false. */
		return 0;
	}
	needle.start = PyBytes_AsString(key);
	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
		      &linecmp);
	if (!hit || hit->deleted) {
		return 0;
	}
	return 1;
}

static PySequenceMethods lazymanifest_seq_meths = {
	(lenfunc)lazymanifest_size, /* sq_length */
	0, /* sq_concat */
	0, /* sq_repeat */
	0, /* sq_item */
	0, /* sq_slice */
	0, /* sq_ass_item */
	0, /* sq_ass_slice */
	(objobjproc)lazymanifest_contains, /* sq_contains */
	0, /* sq_inplace_concat */
	0, /* sq_inplace_repeat */
};


/* Other methods (copy, diff, etc) */
static PyTypeObject lazymanifestType;

/* If the manifest has changes, build the new manifest text and reindex it. */
static int compact(lazymanifest *self)
{
	int i;
	ssize_t need = 0;
	char *data;
	line *src, *dst;
	PyObject *pydata;
	if (!self->dirty)
		return 0;
	for (i = 0; i < self->numlines; i++) {
		if (!self->lines[i].deleted) {
			need += self->lines[i].len;
		}
	}
	pydata = PyBytes_FromStringAndSize(NULL, need);
	if (!pydata)
		return -1;
	data = PyBytes_AsString(pydata);
	if (!data) {
		return -1;
	}
	src = self->lines;
	dst = self->lines;
	for (i = 0; i < self->numlines; i++, src++) {
		char *tofree = NULL;
		if (src->from_malloc) {
			tofree = src->start;
		}
		if (!src->deleted) {
			memcpy(data, src->start, src->len);
			*dst = *src;
			dst->start = data;
			dst->from_malloc = false;
			data += dst->len;
			dst++;
		}
		free(tofree);
	}
	Py_DECREF(self->pydata);
	self->pydata = pydata;
	self->numlines = self->livelines;
	self->dirty = false;
	return 0;
}

static PyObject *lazymanifest_text(lazymanifest *self)
{
	if (compact(self) != 0) {
		PyErr_NoMemory();
		return NULL;
	}
	Py_INCREF(self->pydata);
	return self->pydata;
}

static lazymanifest *lazymanifest_copy(lazymanifest *self)
{
	lazymanifest *copy = NULL;
	if (compact(self) != 0) {
		goto nomem;
	}
	copy = PyObject_New(lazymanifest, &lazymanifestType);
	if (!copy) {
		goto nomem;
	}
	lazymanifest_init_early(copy);
	copy->numlines = self->numlines;
	copy->livelines = self->livelines;
	copy->dirty = false;
	copy->lines = malloc(self->maxlines *sizeof(line));
	if (!copy->lines) {
		goto nomem;
	}
	memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
	copy->maxlines = self->maxlines;
	copy->pydata = self->pydata;
	Py_INCREF(copy->pydata);
	return copy;
nomem:
	PyErr_NoMemory();
	Py_XDECREF(copy);
	return NULL;
}

static lazymanifest *lazymanifest_filtercopy(
	lazymanifest *self, PyObject *matchfn)
{
	lazymanifest *copy = NULL;
	int i;
	if (!PyCallable_Check(matchfn)) {
		PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
		return NULL;
	}
	/* compact ourselves first to avoid double-frees later when we
	 * compact tmp so that it doesn't have random pointers to our
	 * underlying from_malloc-data (self->pydata is safe) */
	if (compact(self) != 0) {
		goto nomem;
	}
	copy = PyObject_New(lazymanifest, &lazymanifestType);
	if (!copy) {
		goto nomem;
	}
	lazymanifest_init_early(copy);
	copy->dirty = true;
	copy->lines = malloc(self->maxlines * sizeof(line));
	if (!copy->lines) {
		goto nomem;
	}
	copy->maxlines = self->maxlines;
	copy->numlines = 0;
	copy->pydata = self->pydata;
	Py_INCREF(copy->pydata);
	for (i = 0; i < self->numlines; i++) {
		PyObject *arglist = NULL, *result = NULL;
		arglist = Py_BuildValue(PY23("(s)", "(y)"),
					self->lines[i].start);
		if (!arglist) {
			goto bail;
		}
		result = PyObject_CallObject(matchfn, arglist);
		Py_DECREF(arglist);
		/* if the callback raised an exception, just let it
		 * through and give up */
		if (!result) {
			goto bail;
		}
		if (PyObject_IsTrue(result)) {
			assert(!(self->lines[i].from_malloc));
			copy->lines[copy->numlines++] = self->lines[i];
		}
		Py_DECREF(result);
	}
	copy->livelines = copy->numlines;
	return copy;
nomem:
	PyErr_NoMemory();
bail:
	Py_XDECREF(copy);
	return NULL;
}

static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
{
	lazymanifest *other;
	PyObject *pyclean = NULL;
	bool listclean;
	PyObject *emptyTup = NULL, *ret = NULL;
	PyObject *es;
	int sneedle = 0, oneedle = 0;
	if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
		return NULL;
	}
	listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
	es = PyBytes_FromString("");
	if (!es) {
		goto nomem;
	}
	emptyTup = PyTuple_Pack(2, Py_None, es);
	Py_DECREF(es);
	if (!emptyTup) {
		goto nomem;
	}
	ret = PyDict_New();
	if (!ret) {
		goto nomem;
	}
	while (sneedle != self->numlines || oneedle != other->numlines) {
		line *left = self->lines + sneedle;
		line *right = other->lines + oneedle;
		int result;
		PyObject *key;
		PyObject *outer;
		/* If we're looking at a deleted entry and it's not
		 * the end of the manifest, just skip it. */
		if (sneedle < self->numlines && left->deleted) {
			sneedle++;
			continue;
		}
		if (oneedle < other->numlines && right->deleted) {
			oneedle++;
			continue;
		}
		/* if we're at the end of either manifest, then we
		 * know the remaining items are adds so we can skip
		 * the strcmp. */
		if (sneedle == self->numlines) {
			result = 1;
		} else if (oneedle == other->numlines) {
			result = -1;
		} else {
			result = linecmp(left, right);
		}
		key = result <= 0 ?
			PyBytes_FromString(left->start) :
			PyBytes_FromString(right->start);
		if (!key)
			goto nomem;
		if (result < 0) {
			PyObject *l = hashflags(left);
			if (!l) {
				goto nomem;
			}
			outer = PyTuple_Pack(2, l, emptyTup);
			Py_DECREF(l);
			if (!outer) {
				goto nomem;
			}
			PyDict_SetItem(ret, key, outer);
			Py_DECREF(outer);
			sneedle++;
		} else if (result > 0) {
			PyObject *r = hashflags(right);
			if (!r) {
				goto nomem;
			}
			outer = PyTuple_Pack(2, emptyTup, r);
			Py_DECREF(r);
			if (!outer) {
				goto nomem;
			}
			PyDict_SetItem(ret, key, outer);
			Py_DECREF(outer);
			oneedle++;
		} else {
			/* file exists in both manifests */
			if (left->len != right->len
			    || memcmp(left->start, right->start, left->len)
			    || left->hash_suffix != right->hash_suffix) {
				PyObject *l = hashflags(left);
				PyObject *r;
				if (!l) {
					goto nomem;
				}
				r = hashflags(right);
				if (!r) {
					Py_DECREF(l);
					goto nomem;
				}
				outer = PyTuple_Pack(2, l, r);
				Py_DECREF(l);
				Py_DECREF(r);
				if (!outer) {
					goto nomem;
				}
				PyDict_SetItem(ret, key, outer);
				Py_DECREF(outer);
			} else if (listclean) {
				PyDict_SetItem(ret, key, Py_None);
			}
			sneedle++;
			oneedle++;
		}
		Py_DECREF(key);
	}
	Py_DECREF(emptyTup);
	return ret;
nomem:
	PyErr_NoMemory();
	Py_XDECREF(ret);
	Py_XDECREF(emptyTup);
	return NULL;
}

static PyMethodDef lazymanifest_methods[] = {
	{"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
	 "Iterate over file names in this lazymanifest."},
	{"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
	 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
	{"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
	 "Make a copy of this lazymanifest."},
	{"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
	 "Make a copy of this manifest filtered by matchfn."},
	{"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
	 "Compare this lazymanifest to another one."},
	{"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
	 "Encode this manifest to text."},
	{NULL},
};

#ifdef IS_PY3K
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
#endif

static PyTypeObject lazymanifestType = {
	PyVarObject_HEAD_INIT(NULL, 0) /* header */
	"parsers.lazymanifest",                           /* tp_name */
	sizeof(lazymanifest),                             /* tp_basicsize */
	0,                                                /* tp_itemsize */
	(destructor)lazymanifest_dealloc,                 /* tp_dealloc */
	0,                                                /* tp_print */
	0,                                                /* tp_getattr */
	0,                                                /* tp_setattr */
	0,                                                /* tp_compare */
	0,                                                /* tp_repr */
	0,                                                /* tp_as_number */
	&lazymanifest_seq_meths,                          /* tp_as_sequence */
	&lazymanifest_mapping_methods,                    /* tp_as_mapping */
	0,                                                /* tp_hash */
	0,                                                /* tp_call */
	0,                                                /* tp_str */
	0,                                                /* tp_getattro */
	0,                                                /* tp_setattro */
	0,                                                /* tp_as_buffer */
	LAZYMANIFEST_TPFLAGS,                             /* tp_flags */
	"TODO(augie)",                                    /* tp_doc */
	0,                                                /* tp_traverse */
	0,                                                /* tp_clear */
	0,                                                /* tp_richcompare */
	0,                                             /* tp_weaklistoffset */
	(getiterfunc)lazymanifest_getkeysiter,                /* tp_iter */
	0,                                                /* tp_iternext */
	lazymanifest_methods,                             /* tp_methods */
	0,                                                /* tp_members */
	0,                                                /* tp_getset */
	0,                                                /* tp_base */
	0,                                                /* tp_dict */
	0,                                                /* tp_descr_get */
	0,                                                /* tp_descr_set */
	0,                                                /* tp_dictoffset */
	(initproc)lazymanifest_init,                      /* tp_init */
	0,                                                /* tp_alloc */
};

void manifest_module_init(PyObject * mod)
{
	lazymanifestType.tp_new = PyType_GenericNew;
	if (PyType_Ready(&lazymanifestType) < 0)
		return;
	Py_INCREF(&lazymanifestType);

	PyModule_AddObject(mod, "lazymanifest",
			   (PyObject *)&lazymanifestType);
}