Fill in the uncompressed size during revlog.addgroup
This uses code from Matt to calculate the size change that
would result from applying a delta to keep an accurate running
total of the text size during revlog.addgroup
--- a/mercurial/mdiff.py Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/mdiff.py Tue Apr 04 16:38:44 2006 -0400
@@ -192,4 +192,5 @@
return mpatch.patches(a, [bin])
patches = mpatch.patches
+patchedsize = mpatch.patchedsize
textdiff = bdiff.bdiff
--- a/mercurial/mpatch.c Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/mpatch.c Tue Apr 04 16:38:44 2006 -0400
@@ -354,8 +354,44 @@
return result;
}
+/* calculate size of a patched file directly */
+static PyObject *
+patchedsize(PyObject *self, PyObject *args)
+{
+ long orig, start, end, len, outlen = 0, last = 0;
+ int patchlen;
+ char *bin, *binend;
+ char decode[12]; /* for dealing with alignment issues */
+
+ if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
+ return NULL;
+
+ binend = bin + patchlen;
+
+ while (bin < binend) {
+ memcpy(decode, bin, 12);
+ start = ntohl(*(uint32_t *)decode);
+ end = ntohl(*(uint32_t *)(decode + 4));
+ len = ntohl(*(uint32_t *)(decode + 8));
+ bin += 12 + len;
+ outlen += start - last;
+ last = end;
+ outlen += len;
+ }
+
+ if (bin != binend) {
+ if (!PyErr_Occurred())
+ PyErr_SetString(mpatch_Error, "patch cannot be decoded");
+ return NULL;
+ }
+
+ outlen += orig - last;
+ return Py_BuildValue("l", outlen);
+}
+
static PyMethodDef methods[] = {
{"patches", patches, METH_VARARGS, "apply a series of patches\n"},
+ {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
{NULL, NULL}
};
--- a/mercurial/revlog.py Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/revlog.py Tue Apr 04 16:38:44 2006 -0400
@@ -342,8 +342,40 @@
if self.version != 0:
return self.ngoffset(self.index[rev][0])
return self.index[rev][0]
+
def end(self, rev): return self.start(rev) + self.length(rev)
+ def size(self, rev):
+ """return the length of the uncompressed text for a given revision"""
+ l = -1
+ if self.version != 0:
+ l = self.index[rev][2]
+ if l >= 0:
+ return l
+
+ t = self.revision(self.node(rev))
+ return len(t)
+
+ # alternate implementation, The advantage to this code is it
+ # will be faster for a single revision. But, the results are not
+ # cached, so finding the size of every revision will be slower.
+ """
+ if self.cache and self.cache[1] == rev:
+ return len(self.cache[2])
+
+ base = self.base(rev)
+ if self.cache and self.cache[1] >= base and self.cache[1] < rev:
+ base = self.cache[1]
+ text = self.cache[2]
+ else:
+ text = self.revision(self.node(base))
+
+ l = len(text)
+ for x in xrange(base + 1, rev + 1):
+ l = mdiff.patchedsize(l, self.chunk(x))
+ return l
+ """
+
def length(self, rev):
if rev < 0:
return 0
@@ -904,7 +936,7 @@
node = None
base = prev = -1
- start = end = measure = 0
+ start = end = textlen = 0
if r:
end = self.end(t)
@@ -949,8 +981,9 @@
if chain == prev:
tempd = compress(delta)
cdelta = tempd[0] + tempd[1]
+ textlen = mdiff.patchedsize(textlen, delta)
- if chain != prev or (end - start + len(cdelta)) > measure * 2:
+ if chain != prev or (end - start + len(cdelta)) > textlen * 2:
# flush our writes here so we can read it in revision
if dfh:
dfh.flush()
@@ -960,12 +993,12 @@
chk = self.addrevision(text, transaction, link, p1, p2)
if chk != node:
raise RevlogError(_("consistency error adding group"))
- measure = len(text)
+ textlen = len(text)
else:
if self.version == 0:
e = (end, len(cdelta), base, link, p1, p2, node)
else:
- e = (self.offset_type(end, 0), len(cdelta), -1, base,
+ e = (self.offset_type(end, 0), len(cdelta), textlen, base,
link, self.rev(p1), self.rev(p2), node)
self.index.append(e)
self.nodemap[node] = r