mercurial/manifest.c
changeset 24214 a5f1bccd2996
child 24228 542c891274b2
equal deleted inserted replaced
24213:e0c1328df872 24214:a5f1bccd2996
       
     1 /*
       
     2  * manifest.c - manifest type that does on-demand parsing.
       
     3  *
       
     4  * Copyright 2015, Google Inc.
       
     5  *
       
     6  * This software may be used and distributed according to the terms of
       
     7  * the GNU General Public License, incorporated herein by reference.
       
     8  */
       
     9 #include <assert.h>
       
    10 #include <string.h>
       
    11 #include <stdlib.h>
       
    12 
       
    13 #include <Python.h>
       
    14 
       
    15 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
       
    16 #ifdef _MSC_VER
       
    17 #define true 1
       
    18 #define false 0
       
    19 typedef unsigned char bool;
       
    20 #else
       
    21 #include <stdbool.h>
       
    22 #endif
       
    23 
       
    24 #define DEFAULT_LINES 100000
       
    25 
       
    26 typedef struct {
       
    27 	char *start;
       
    28 	Py_ssize_t len; /* length of line including terminal newline */
       
    29 	char hash_suffix;
       
    30 	bool from_malloc;
       
    31 	bool deleted;
       
    32 } line;
       
    33 
       
    34 typedef struct {
       
    35 	PyObject_HEAD
       
    36 	PyObject *pydata;
       
    37 	line *lines;
       
    38 	int numlines; /* number of line entries */
       
    39 	int livelines; /* number of non-deleted lines */
       
    40 	int maxlines; /* allocated number of lines */
       
    41 	bool dirty;
       
    42 } lazymanifest;
       
    43 
       
    44 #define MANIFEST_OOM -1
       
    45 #define MANIFEST_NOT_SORTED -2
       
    46 #define MANIFEST_MALFORMED -3
       
    47 
       
    48 /* defined in parsers.c */
       
    49 PyObject *unhexlify(const char *str, int len);
       
    50 
       
    51 /* get the length of the path for a line */
       
    52 static size_t pathlen(line *l) {
       
    53 	return strlen(l->start);
       
    54 }
       
    55 
       
    56 /* get the node value of a single line */
       
    57 static PyObject *nodeof(line *l) {
       
    58 	char *s = l->start;
       
    59 	ssize_t llen = pathlen(l);
       
    60 	PyObject *hash = unhexlify(s + llen + 1, 40);
       
    61 	if (!hash) {
       
    62 		return NULL;
       
    63 	}
       
    64 	if (l->hash_suffix != '\0') {
       
    65 		char newhash[21];
       
    66 		memcpy(newhash, PyString_AsString(hash), 20);
       
    67 		Py_DECREF(hash);
       
    68 		newhash[20] = l->hash_suffix;
       
    69 		hash = PyString_FromStringAndSize(newhash, 21);
       
    70 	}
       
    71 	return hash;
       
    72 }
       
    73 
       
    74 /* get the node hash and flags of a line as a tuple */
       
    75 static PyObject *hashflags(line *l)
       
    76 {
       
    77 	char *s = l->start;
       
    78 	size_t plen = pathlen(l);
       
    79 	PyObject *hash = nodeof(l);
       
    80 
       
    81 	/* 40 for hash, 1 for null byte, 1 for newline */
       
    82 	size_t hplen = plen + 42;
       
    83 	Py_ssize_t flen = l->len - hplen;
       
    84 	PyObject *flags;
       
    85 	PyObject *tup;
       
    86 
       
    87 	if (!hash)
       
    88 		return NULL;
       
    89 	flags = PyString_FromStringAndSize(s + hplen - 1, flen);
       
    90 	if (!flags) {
       
    91 		Py_DECREF(hash);
       
    92 		return NULL;
       
    93 	}
       
    94 	tup = PyTuple_Pack(2, hash, flags);
       
    95 	Py_DECREF(flags);
       
    96 	Py_DECREF(hash);
       
    97 	return tup;
       
    98 }
       
    99 
       
   100 /* if we're about to run out of space in the line index, add more */
       
   101 static bool realloc_if_full(lazymanifest *self)
       
   102 {
       
   103 	if (self->numlines == self->maxlines) {
       
   104 		self->maxlines *= 2;
       
   105 		self->lines = realloc(self->lines, self->maxlines * sizeof(line));
       
   106 	}
       
   107 	return self->lines;
       
   108 }
       
   109 
       
   110 /*
       
   111  * Find the line boundaries in the manifest that 'data' points to and store
       
   112  * information about each line in 'self'.
       
   113  */
       
   114 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
       
   115 {
       
   116 	char *prev = NULL;
       
   117 	while (len > 0) {
       
   118 		line *l;
       
   119 		char *next = memchr(data, '\n', len);
       
   120 		if (!next) {
       
   121 			return MANIFEST_MALFORMED;
       
   122 		}
       
   123 		next++; /* advance past newline */
       
   124 		if (!realloc_if_full(self)) {
       
   125 			return MANIFEST_OOM; /* no memory */
       
   126 		}
       
   127 		if (prev && strcmp(prev, data) > -1) {
       
   128 			/* This data isn't sorted, so we have to abort. */
       
   129 			return MANIFEST_NOT_SORTED;
       
   130 		}
       
   131 		l = self->lines + ((self->numlines)++);
       
   132 		l->start = data;
       
   133 		l->len = next - data;
       
   134 		l->hash_suffix = '\0';
       
   135 		l->from_malloc = false;
       
   136 		l->deleted = false;
       
   137 		len = len - l->len;
       
   138 		prev = data;
       
   139 		data = next;
       
   140 	}
       
   141 	self->livelines = self->numlines;
       
   142 	return 0;
       
   143 }
       
   144 
       
   145 static int lazymanifest_init(lazymanifest *self, PyObject *args)
       
   146 {
       
   147 	char *data;
       
   148 	Py_ssize_t len;
       
   149 	int err, ret;
       
   150 	PyObject *pydata;
       
   151 	if (!PyArg_ParseTuple(args, "S", &pydata)) {
       
   152 		return -1;
       
   153 	}
       
   154 	err = PyString_AsStringAndSize(pydata, &data, &len);
       
   155 
       
   156 	self->dirty = false;
       
   157 	if (err == -1)
       
   158 		return -1;
       
   159 	self->pydata = pydata;
       
   160 	Py_INCREF(self->pydata);
       
   161 	Py_BEGIN_ALLOW_THREADS
       
   162 	self->lines = malloc(DEFAULT_LINES * sizeof(line));
       
   163 	self->maxlines = DEFAULT_LINES;
       
   164 	self->numlines = 0;
       
   165 	if (!self->lines)
       
   166 		ret = MANIFEST_OOM;
       
   167 	else
       
   168 		ret = find_lines(self, data, len);
       
   169 	Py_END_ALLOW_THREADS
       
   170 	switch (ret) {
       
   171 	case 0:
       
   172 		break;
       
   173 	case MANIFEST_OOM:
       
   174 		PyErr_NoMemory();
       
   175 		break;
       
   176 	case MANIFEST_NOT_SORTED:
       
   177 		PyErr_Format(PyExc_ValueError,
       
   178 			     "Manifest lines not in sorted order.");
       
   179 		break;
       
   180 	case MANIFEST_MALFORMED:
       
   181 		PyErr_Format(PyExc_ValueError,
       
   182 			     "Manifest did not end in a newline.");
       
   183 		break;
       
   184 	default:
       
   185 		PyErr_Format(PyExc_ValueError,
       
   186 			     "Unknown problem parsing manifest.");
       
   187 	}
       
   188 	return ret == 0 ? 0 : -1;
       
   189 }
       
   190 
       
   191 static void lazymanifest_dealloc(lazymanifest *self)
       
   192 {
       
   193 	/* free any extra lines we had to allocate */
       
   194 	int i;
       
   195 	for (i = 0; i < self->numlines; i++) {
       
   196 		if (self->lines[i].from_malloc) {
       
   197 			free(self->lines[i].start);
       
   198 		}
       
   199 	}
       
   200 	if (self->lines) {
       
   201 		free(self->lines);
       
   202 		self->lines = NULL;
       
   203 	}
       
   204 	if (self->pydata) {
       
   205 		Py_DECREF(self->pydata);
       
   206 		self->pydata = NULL;
       
   207 	}
       
   208 	PyObject_Del(self);
       
   209 }
       
   210 
       
   211 /* iteration support */
       
   212 
       
   213 typedef struct {
       
   214 	PyObject_HEAD lazymanifest *m;
       
   215 	Py_ssize_t pos;
       
   216 } lmIter;
       
   217 
       
   218 static void lmiter_dealloc(PyObject *o)
       
   219 {
       
   220 	lmIter *self = (lmIter *)o;
       
   221 	Py_DECREF(self->m);
       
   222 	PyObject_Del(self);
       
   223 }
       
   224 
       
   225 static PyObject *lmiter_iternext(PyObject *o)
       
   226 {
       
   227 	size_t pl;
       
   228 	line *l;
       
   229 	Py_ssize_t consumed;
       
   230 	PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
       
   231 	lmIter *self = (lmIter *)o;
       
   232 	do {
       
   233 		self->pos++;
       
   234 		if (self->pos >= self->m->numlines) {
       
   235 			goto bail;
       
   236 		}
       
   237 		/* skip over deleted manifest entries */
       
   238 	} while (self->m->lines[self->pos].deleted);
       
   239 	l = self->m->lines + self->pos;
       
   240 	pl = pathlen(l);
       
   241 	path = PyString_FromStringAndSize(l->start, pl);
       
   242 	hash = nodeof(l);
       
   243 	consumed = pl + 41;
       
   244 	flags = PyString_FromStringAndSize(l->start + consumed,
       
   245 									   l->len - consumed - 1);
       
   246 	if (!flags) {
       
   247 		goto bail;
       
   248 	}
       
   249 	ret = PyTuple_Pack(3, path, hash, flags);
       
   250  bail:
       
   251 	Py_XDECREF(path);
       
   252 	Py_XDECREF(hash);
       
   253 	Py_XDECREF(flags);
       
   254 	return ret;
       
   255 }
       
   256 
       
   257 static PyTypeObject lazymanifestIterator = {
       
   258 	PyObject_HEAD_INIT(NULL)
       
   259 	0,                               /*ob_size */
       
   260 	"parsers.lazymanifest.iterator", /*tp_name */
       
   261 	sizeof(lmIter),                  /*tp_basicsize */
       
   262 	0,                               /*tp_itemsize */
       
   263 	lmiter_dealloc,                  /*tp_dealloc */
       
   264 	0,                               /*tp_print */
       
   265 	0,                               /*tp_getattr */
       
   266 	0,                               /*tp_setattr */
       
   267 	0,                               /*tp_compare */
       
   268 	0,                               /*tp_repr */
       
   269 	0,                               /*tp_as_number */
       
   270 	0,                               /*tp_as_sequence */
       
   271 	0,                               /*tp_as_mapping */
       
   272 	0,                               /*tp_hash */
       
   273 	0,                               /*tp_call */
       
   274 	0,                               /*tp_str */
       
   275 	0,                               /*tp_getattro */
       
   276 	0,                               /*tp_setattro */
       
   277 	0,                               /*tp_as_buffer */
       
   278 	/* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
       
   279 	   use tp_iter and tp_iternext fields. */
       
   280 	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
       
   281 	"Iterator for a lazymanifest.",  /* tp_doc */
       
   282 	0,                               /* tp_traverse */
       
   283 	0,                               /* tp_clear */
       
   284 	0,                               /* tp_richcompare */
       
   285 	0,                               /* tp_weaklistoffset */
       
   286 	PyObject_SelfIter,               /* tp_iter: __iter__() method */
       
   287 	lmiter_iternext,                 /* tp_iternext: next() method */
       
   288 };
       
   289 
       
   290 static lazymanifest *lazymanifest_copy(lazymanifest *self);
       
   291 
       
   292 static PyObject *lazymanifest_getiter(lazymanifest *self)
       
   293 {
       
   294 	lmIter *i = NULL;
       
   295 	lazymanifest *t = lazymanifest_copy(self);
       
   296 	if (!t) {
       
   297 		PyErr_NoMemory();
       
   298 		return NULL;
       
   299 	}
       
   300 	i = PyObject_New(lmIter, &lazymanifestIterator);
       
   301 	if (i) {
       
   302 		i->m = t;
       
   303 		i->pos = -1;
       
   304 	} else {
       
   305 		Py_DECREF(t);
       
   306 		PyErr_NoMemory();
       
   307 	}
       
   308 	return (PyObject *)i;
       
   309 }
       
   310 
       
   311 /* __getitem__ and __setitem__ support */
       
   312 
       
   313 static Py_ssize_t lazymanifest_size(lazymanifest *self)
       
   314 {
       
   315 	return self->livelines;
       
   316 }
       
   317 
       
   318 static int linecmp(const void *left, const void *right)
       
   319 {
       
   320 	return strcmp(((const line *)left)->start,
       
   321 		      ((const line *)right)->start);
       
   322 }
       
   323 
       
   324 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
       
   325 {
       
   326 	line needle;
       
   327 	line *hit;
       
   328 	if (!PyString_Check(key)) {
       
   329 		PyErr_Format(PyExc_TypeError,
       
   330 			     "getitem: manifest keys must be a string.");
       
   331 		return NULL;
       
   332 	}
       
   333 	needle.start = PyString_AsString(key);
       
   334 	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
       
   335 		      &linecmp);
       
   336 	if (!hit || hit->deleted) {
       
   337 		PyErr_Format(PyExc_KeyError, "No such manifest entry.");
       
   338 		return NULL;
       
   339 	}
       
   340 	return hashflags(hit);
       
   341 }
       
   342 
       
   343 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
       
   344 {
       
   345 	line needle;
       
   346 	line *hit;
       
   347 	if (!PyString_Check(key)) {
       
   348 		PyErr_Format(PyExc_TypeError,
       
   349 			     "delitem: manifest keys must be a string.");
       
   350 		return -1;
       
   351 	}
       
   352 	needle.start = PyString_AsString(key);
       
   353 	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
       
   354 		      &linecmp);
       
   355 	if (!hit || hit->deleted) {
       
   356 		PyErr_Format(PyExc_KeyError,
       
   357 			     "Tried to delete nonexistent manifest entry.");
       
   358 		return -1;
       
   359 	}
       
   360 	self->dirty = true;
       
   361 	hit->deleted = true;
       
   362 	self->livelines--;
       
   363 	return 0;
       
   364 }
       
   365 
       
   366 static int lazymanifest_setitem(
       
   367 	lazymanifest *self, PyObject *key, PyObject *value)
       
   368 {
       
   369 	char *path;
       
   370 	Py_ssize_t plen;
       
   371 	PyObject *pyhash;
       
   372 	Py_ssize_t hlen;
       
   373 	char *hash;
       
   374 	PyObject *pyflags;
       
   375 	char *flags;
       
   376 	Py_ssize_t flen;
       
   377 	size_t dlen;
       
   378 	char *dest;
       
   379 	int i;
       
   380 	line new;
       
   381 	line *hit;
       
   382 	if (!PyString_Check(key)) {
       
   383 		PyErr_Format(PyExc_TypeError,
       
   384 			     "setitem: manifest keys must be a string.");
       
   385 		return -1;
       
   386 	}
       
   387 	if (!value) {
       
   388 		return lazymanifest_delitem(self, key);
       
   389 	}
       
   390 	if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
       
   391 		PyErr_Format(PyExc_TypeError,
       
   392 			     "Manifest values must be a tuple of (node, flags).");
       
   393 		return -1;
       
   394 	}
       
   395 	if (PyString_AsStringAndSize(key, &path, &plen) == -1) {
       
   396 		return -1;
       
   397 	}
       
   398 
       
   399 	pyhash = PyTuple_GetItem(value, 0);
       
   400 	if (!PyString_Check(pyhash)) {
       
   401 		PyErr_Format(PyExc_TypeError,
       
   402 			     "node must be a 20-byte string");
       
   403 		return -1;
       
   404 	}
       
   405 	hlen = PyString_Size(pyhash);
       
   406 	/* Some parts of the codebase try and set 21 or 22
       
   407 	 * byte "hash" values in order to perturb things for
       
   408 	 * status. We have to preserve at least the 21st
       
   409 	 * byte. Sigh. If there's a 22nd byte, we drop it on
       
   410 	 * the floor, which works fine.
       
   411 	 */
       
   412 	if (hlen != 20 && hlen != 21 && hlen != 22) {
       
   413 		PyErr_Format(PyExc_TypeError,
       
   414 			     "node must be a 20-byte string");
       
   415 		return -1;
       
   416 	}
       
   417 	hash = PyString_AsString(pyhash);
       
   418 
       
   419 	pyflags = PyTuple_GetItem(value, 1);
       
   420 	if (!PyString_Check(pyflags) || PyString_Size(pyflags) > 1) {
       
   421 		PyErr_Format(PyExc_TypeError,
       
   422 			     "flags must a 0 or 1 byte string");
       
   423 		return -1;
       
   424 	}
       
   425 	if (PyString_AsStringAndSize(pyflags, &flags, &flen) == -1) {
       
   426 		return -1;
       
   427 	}
       
   428 	/* one null byte and one newline */
       
   429 	dlen = plen + 41 + flen + 1;
       
   430 	dest = malloc(dlen);
       
   431 	if (!dest) {
       
   432 		PyErr_NoMemory();
       
   433 		return -1;
       
   434 	}
       
   435 	memcpy(dest, path, plen + 1);
       
   436 	for (i = 0; i < 20; i++) {
       
   437 		sprintf(dest + plen + 1 + (i * 2), "%02hhx", hash[i]);
       
   438 	}
       
   439 	memcpy(dest + plen + 41, flags, flen);
       
   440 	dest[plen + 41 + flen] = '\n';
       
   441 	new.start = dest;
       
   442 	new.len = dlen;
       
   443 	new.hash_suffix = '\0';
       
   444 	if (hlen > 20) {
       
   445 		new.hash_suffix = hash[20];
       
   446 	}
       
   447 	new.from_malloc = true;     /* is `start` a pointer we allocated? */
       
   448 	new.deleted = false;        /* is this entry deleted? */
       
   449 	hit = bsearch(&new, self->lines, self->numlines,
       
   450 		      sizeof(line), &linecmp);
       
   451 	self->dirty = true;
       
   452 	if (hit) {
       
   453 		/* updating a line we already had */
       
   454 		if (hit->from_malloc) {
       
   455 			free(hit->start);
       
   456 		}
       
   457 		if (hit->deleted) {
       
   458 			self->livelines++;
       
   459 		}
       
   460 		*hit = new;
       
   461 	} else {
       
   462 		/* new line */
       
   463 		if (!realloc_if_full(self)) {
       
   464 			PyErr_NoMemory();
       
   465 			return -1;
       
   466 		}
       
   467 		self->lines[self->numlines++] = new;
       
   468 		self->livelines++;
       
   469 		/* TODO: do a binary search and insert rather than
       
   470 		 * append and qsort. Also offer a batch-insert
       
   471 		 * interface, which should be a nice little
       
   472 		 * performance win.
       
   473 		 */
       
   474 		qsort(self->lines, self->numlines, sizeof(line), &linecmp);
       
   475 	}
       
   476 	return 0;
       
   477 }
       
   478 
       
   479 static PyMappingMethods lazymanifest_mapping_methods = {
       
   480 	(lenfunc)lazymanifest_size,             /* mp_length */
       
   481 	(binaryfunc)lazymanifest_getitem,       /* mp_subscript */
       
   482 	(objobjargproc)lazymanifest_setitem,    /* mp_ass_subscript */
       
   483 };
       
   484 
       
   485 /* sequence methods (important or __contains__ builds an iterator */
       
   486 
       
   487 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
       
   488 {
       
   489 	line needle;
       
   490 	line *hit;
       
   491 	if (!PyString_Check(key)) {
       
   492 		/* Our keys are always strings, so if the contains
       
   493 		 * check is for a non-string, just return false. */
       
   494 		return 0;
       
   495 	}
       
   496 	needle.start = PyString_AsString(key);
       
   497 	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
       
   498 		      &linecmp);
       
   499 	if (!hit || hit->deleted) {
       
   500 		return 0;
       
   501 	}
       
   502 	return 1;
       
   503 }
       
   504 
       
   505 static PySequenceMethods lazymanifest_seq_meths = {
       
   506 	(lenfunc)lazymanifest_size, /* sq_length */
       
   507 	0, /* sq_concat */
       
   508 	0, /* sq_repeat */
       
   509 	0, /* sq_item */
       
   510 	0, /* sq_slice */
       
   511 	0, /* sq_ass_item */
       
   512 	0, /* sq_ass_slice */
       
   513 	(objobjproc)lazymanifest_contains, /* sq_contains */
       
   514 	0, /* sq_inplace_concat */
       
   515 	0, /* sq_inplace_repeat */
       
   516 };
       
   517 
       
   518 
       
   519 /* Other methods (copy, diff, etc) */
       
   520 static PyTypeObject lazymanifestType;
       
   521 
       
   522 /* If the manifest has changes, build the new manifest text and reindex it. */
       
   523 static int compact(lazymanifest *self) {
       
   524 	int i;
       
   525 	ssize_t need = 0;
       
   526 	char *data;
       
   527 	line *src, *dst;
       
   528 	PyObject *pydata;
       
   529 	if (!self->dirty)
       
   530 		return 0;
       
   531 	for (i = 0; i < self->numlines; i++) {
       
   532 		if (!self->lines[i].deleted) {
       
   533 			need += self->lines[i].len;
       
   534 		}
       
   535 	}
       
   536 	pydata = PyString_FromStringAndSize(NULL, need);
       
   537 	if (!pydata)
       
   538 		return -1;
       
   539 	data = PyString_AsString(pydata);
       
   540 	if (!data) {
       
   541 		return -1;
       
   542 	}
       
   543 	src = self->lines;
       
   544 	dst = self->lines;
       
   545 	for (i = 0; i < self->numlines; i++, src++) {
       
   546 		char *tofree = NULL;
       
   547 		if (src->from_malloc) {
       
   548 			tofree = src->start;
       
   549 		}
       
   550 		if (!src->deleted) {
       
   551 			memcpy(data, src->start, src->len);
       
   552 			*dst = *src;
       
   553 			dst->start = data;
       
   554 			dst->from_malloc = false;
       
   555 			data += dst->len;
       
   556 			dst++;
       
   557 		}
       
   558 		free(tofree);
       
   559 	}
       
   560 	Py_DECREF(self->pydata);
       
   561 	self->pydata = pydata;
       
   562 	self->numlines = self->livelines;
       
   563 	self->dirty = false;
       
   564 	return 0;
       
   565 }
       
   566 
       
   567 static PyObject *lazymanifest_text(lazymanifest *self)
       
   568 {
       
   569 	if (compact(self) != 0) {
       
   570 		PyErr_NoMemory();
       
   571 		return NULL;
       
   572 	}
       
   573 	Py_INCREF(self->pydata);
       
   574 	return self->pydata;
       
   575 }
       
   576 
       
   577 static lazymanifest *lazymanifest_copy(lazymanifest *self)
       
   578 {
       
   579 	lazymanifest *copy = NULL;
       
   580 	if (compact(self) != 0) {
       
   581 		goto nomem;
       
   582 	}
       
   583 	copy = PyObject_New(lazymanifest, &lazymanifestType);
       
   584 	if (!copy) {
       
   585 		goto nomem;
       
   586 	}
       
   587 	copy->numlines = self->numlines;
       
   588 	copy->livelines = self->livelines;
       
   589 	copy->dirty = false;
       
   590 	copy->lines = malloc(self->maxlines *sizeof(line));
       
   591 	if (!copy->lines) {
       
   592 		goto nomem;
       
   593 	}
       
   594 	memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
       
   595 	copy->maxlines = self->maxlines;
       
   596 	copy->pydata = self->pydata;
       
   597 	Py_INCREF(copy->pydata);
       
   598 	return copy;
       
   599  nomem:
       
   600 	PyErr_NoMemory();
       
   601 	Py_XDECREF(copy);
       
   602 	return NULL;
       
   603 }
       
   604 
       
   605 static lazymanifest *lazymanifest_filtercopy(
       
   606 	lazymanifest *self, PyObject *matchfn)
       
   607 {
       
   608 	lazymanifest *copy = NULL;
       
   609 	int i;
       
   610 	if (!PyCallable_Check(matchfn)) {
       
   611 		PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
       
   612 		return NULL;
       
   613 	}
       
   614 	/* compact ourselves first to avoid double-frees later when we
       
   615 	 * compact tmp so that it doesn't have random pointers to our
       
   616 	 * underlying from_malloc-data (self->pydata is safe) */
       
   617 	if (compact(self) != 0) {
       
   618 		goto nomem;
       
   619 	}
       
   620 	copy = PyObject_New(lazymanifest, &lazymanifestType);
       
   621 	copy->dirty = true;
       
   622 	copy->lines = malloc(self->maxlines * sizeof(line));
       
   623 	if (!copy->lines) {
       
   624 		goto nomem;
       
   625 	}
       
   626 	copy->maxlines = self->maxlines;
       
   627 	copy->numlines = 0;
       
   628 	copy->pydata = self->pydata;
       
   629 	Py_INCREF(self->pydata);
       
   630 	for (i = 0; i < self->numlines; i++) {
       
   631 		PyObject *arg = PyString_FromString(self->lines[i].start);
       
   632 		PyObject *arglist = PyTuple_Pack(1, arg);
       
   633 		PyObject *result = PyObject_CallObject(matchfn, arglist);
       
   634 		Py_DECREF(arglist);
       
   635 		Py_DECREF(arg);
       
   636 		/* if the callback raised an exception, just let it
       
   637 		 * through and give up */
       
   638 		if (!result) {
       
   639 			free(copy->lines);
       
   640 			Py_DECREF(self->pydata);
       
   641 			return NULL;
       
   642 		}
       
   643 		if (PyObject_IsTrue(result)) {
       
   644 			assert(!(self->lines[i].from_malloc));
       
   645 			copy->lines[copy->numlines++] = self->lines[i];
       
   646 		}
       
   647 		Py_DECREF(result);
       
   648 	}
       
   649 	copy->livelines = copy->numlines;
       
   650 	return copy;
       
   651  nomem:
       
   652 	PyErr_NoMemory();
       
   653 	Py_XDECREF(copy);
       
   654 	return NULL;
       
   655 }
       
   656 
       
   657 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
       
   658 {
       
   659 	lazymanifest *other;
       
   660 	PyObject *pyclean = NULL;
       
   661 	bool listclean;
       
   662 	PyObject *emptyTup = NULL, *ret = NULL;
       
   663 	PyObject *es;
       
   664 	int sneedle = 0, oneedle = 0;
       
   665 	if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
       
   666 		return NULL;
       
   667 	}
       
   668 	listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
       
   669 	es = PyString_FromString("");
       
   670 	if (!es) {
       
   671 		goto nomem;
       
   672 	}
       
   673 	emptyTup = PyTuple_Pack(2, Py_None, es);
       
   674 	Py_DECREF(es);
       
   675 	if (!emptyTup) {
       
   676 		goto nomem;
       
   677 	}
       
   678 	ret = PyDict_New();
       
   679 	if (!ret) {
       
   680 		goto nomem;
       
   681 	}
       
   682 	while (sneedle != self->numlines || oneedle != other->numlines) {
       
   683 		line *left = self->lines + sneedle;
       
   684 		line *right = other->lines + oneedle;
       
   685 		int result;
       
   686 		PyObject *key;
       
   687 		PyObject *outer;
       
   688 		/* If we're looking at a deleted entry and it's not
       
   689 		 * the end of the manifest, just skip it. */
       
   690 		if (left->deleted && sneedle < self->numlines) {
       
   691 			sneedle++;
       
   692 			continue;
       
   693 		}
       
   694 		if (right->deleted && oneedle < other->numlines) {
       
   695 			oneedle++;
       
   696 			continue;
       
   697 		}
       
   698 		/* if we're at the end of either manifest, then we
       
   699 		 * know the remaining items are adds so we can skip
       
   700 		 * the strcmp. */
       
   701 		if (sneedle == self->numlines) {
       
   702 			result = 1;
       
   703 		} else if (oneedle == other->numlines) {
       
   704 			result = -1;
       
   705 		} else {
       
   706 			result = linecmp(left, right);
       
   707 		}
       
   708 		key = result <= 0 ?
       
   709 			PyString_FromString(left->start) :
       
   710 			PyString_FromString(right->start);
       
   711 		if (!key)
       
   712 			goto nomem;
       
   713 		if (result < 0) {
       
   714 			PyObject *l = hashflags(left);
       
   715 			if (!l) {
       
   716 				goto nomem;
       
   717 			}
       
   718 			outer = PyTuple_Pack(2, l, emptyTup);
       
   719 			Py_DECREF(l);
       
   720 			if (!outer) {
       
   721 				goto nomem;
       
   722 			}
       
   723 			PyDict_SetItem(ret, key, outer);
       
   724 			Py_DECREF(outer);
       
   725 			sneedle++;
       
   726 		} else if (result > 0) {
       
   727 			PyObject *r = hashflags(right);
       
   728 			if (!r) {
       
   729 				goto nomem;
       
   730 			}
       
   731 			outer = PyTuple_Pack(2, emptyTup, r);
       
   732 			Py_DECREF(r);
       
   733 			if (!outer) {
       
   734 				goto nomem;
       
   735 			}
       
   736 			PyDict_SetItem(ret, key, outer);
       
   737 			Py_DECREF(outer);
       
   738 			oneedle++;
       
   739 		} else {
       
   740 			/* file exists in both manifests */
       
   741 			if (left->len != right->len
       
   742 			    || memcmp(left->start, right->start, left->len)
       
   743 			    || left->hash_suffix != right->hash_suffix) {
       
   744 				PyObject *l = hashflags(left);
       
   745 				PyObject *r;
       
   746 				if (!l) {
       
   747 					goto nomem;
       
   748 				}
       
   749 				r = hashflags(right);
       
   750 				if (!r) {
       
   751 					Py_DECREF(l);
       
   752 					goto nomem;
       
   753 				}
       
   754 				outer = PyTuple_Pack(2, l, r);
       
   755 				Py_DECREF(l);
       
   756 				Py_DECREF(r);
       
   757 				if (!outer) {
       
   758 					goto nomem;
       
   759 				}
       
   760 				PyDict_SetItem(ret, key, outer);
       
   761 				Py_DECREF(outer);
       
   762 			} else if (listclean) {
       
   763 				PyDict_SetItem(ret, key, Py_None);
       
   764 			}
       
   765 			sneedle++;
       
   766 			oneedle++;
       
   767 		}
       
   768 		Py_DECREF(key);
       
   769 	}
       
   770 	Py_DECREF(emptyTup);
       
   771 	return ret;
       
   772  nomem:
       
   773 	PyErr_NoMemory();
       
   774 	Py_XDECREF(ret);
       
   775 	Py_XDECREF(emptyTup);
       
   776 	return NULL;
       
   777 }
       
   778 
       
   779 static PyMethodDef lazymanifest_methods[] = {
       
   780 	{"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
       
   781 	 "Make a copy of this lazymanifest."},
       
   782 	{"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
       
   783 	 "Make a copy of this manifest filtered by matchfn."},
       
   784 	{"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
       
   785 	 "Compare this lazymanifest to another one."},
       
   786 	{"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
       
   787 	 "Encode this manifest to text."},
       
   788 	{NULL},
       
   789 };
       
   790 
       
   791 static PyTypeObject lazymanifestType = {
       
   792 	PyObject_HEAD_INIT(NULL)
       
   793 	0,                                                /* ob_size */
       
   794 	"parsers.lazymanifest",                           /* tp_name */
       
   795 	sizeof(lazymanifest),                             /* tp_basicsize */
       
   796 	0,                                                /* tp_itemsize */
       
   797 	(destructor)lazymanifest_dealloc,                 /* tp_dealloc */
       
   798 	0,                                                /* tp_print */
       
   799 	0,                                                /* tp_getattr */
       
   800 	0,                                                /* tp_setattr */
       
   801 	0,                                                /* tp_compare */
       
   802 	0,                                                /* tp_repr */
       
   803 	0,                                                /* tp_as_number */
       
   804 	&lazymanifest_seq_meths,                          /* tp_as_sequence */
       
   805 	&lazymanifest_mapping_methods,                    /* tp_as_mapping */
       
   806 	0,                                                /* tp_hash */
       
   807 	0,                                                /* tp_call */
       
   808 	0,                                                /* tp_str */
       
   809 	0,                                                /* tp_getattro */
       
   810 	0,                                                /* tp_setattro */
       
   811 	0,                                                /* tp_as_buffer */
       
   812 	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN, /* tp_flags */
       
   813 	"TODO(augie)",                                    /* tp_doc */
       
   814 	0,                                                /* tp_traverse */
       
   815 	0,                                                /* tp_clear */
       
   816 	0,                                                /* tp_richcompare */
       
   817 	0,                                             /* tp_weaklistoffset */
       
   818 	(getiterfunc)lazymanifest_getiter,                /* tp_iter */
       
   819 	0,                                                /* tp_iternext */
       
   820 	lazymanifest_methods,                             /* tp_methods */
       
   821 	0,                                                /* tp_members */
       
   822 	0,                                                /* tp_getset */
       
   823 	0,                                                /* tp_base */
       
   824 	0,                                                /* tp_dict */
       
   825 	0,                                                /* tp_descr_get */
       
   826 	0,                                                /* tp_descr_set */
       
   827 	0,                                                /* tp_dictoffset */
       
   828 	(initproc)lazymanifest_init,                      /* tp_init */
       
   829 	0,                                                /* tp_alloc */
       
   830 };
       
   831 
       
   832 void manifest_module_init(PyObject * mod)
       
   833 {
       
   834 	lazymanifestType.tp_new = PyType_GenericNew;
       
   835 	if (PyType_Ready(&lazymanifestType) < 0)
       
   836 		return;
       
   837 	Py_INCREF(&lazymanifestType);
       
   838 
       
   839 	PyModule_AddObject(mod, "lazymanifest",
       
   840 			   (PyObject *)&lazymanifestType);
       
   841 }