changeset 2078:441ea218414e

Fill in the uncompressed size during revlog.addgroup This uses code from Matt to calculate the size change that would result from applying a delta to keep an accurate running total of the text size during revlog.addgroup
author mason@suse.com
date Tue, 04 Apr 2006 16:38:44 -0400
parents 4d0700ae0991
children ee96ca273f32
files mercurial/mdiff.py mercurial/mpatch.c mercurial/revlog.py
diffstat 3 files changed, 74 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/mdiff.py	Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/mdiff.py	Tue Apr 04 16:38:44 2006 -0400
@@ -192,4 +192,5 @@
     return mpatch.patches(a, [bin])
 
 patches = mpatch.patches
+patchedsize = mpatch.patchedsize
 textdiff = bdiff.bdiff
--- a/mercurial/mpatch.c	Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/mpatch.c	Tue Apr 04 16:38:44 2006 -0400
@@ -354,8 +354,44 @@
 	return result;
 }
 
+/* calculate size of a patched file directly */
+static PyObject *
+patchedsize(PyObject *self, PyObject *args)
+{
+	long orig, start, end, len, outlen = 0, last = 0;
+	int patchlen;
+	char *bin, *binend;
+	char decode[12]; /* for dealing with alignment issues */
+
+	if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
+		return NULL;
+
+	binend = bin + patchlen;
+
+	while (bin < binend) {
+		memcpy(decode, bin, 12);
+		start = ntohl(*(uint32_t *)decode);
+		end = ntohl(*(uint32_t *)(decode + 4));
+		len = ntohl(*(uint32_t *)(decode + 8));
+		bin += 12 + len;
+		outlen += start - last;
+		last = end;
+		outlen += len;
+	}
+
+	if (bin != binend) {
+		if (!PyErr_Occurred())
+			PyErr_SetString(mpatch_Error, "patch cannot be decoded");
+		return NULL;
+	}
+
+	outlen += orig - last;
+	return Py_BuildValue("l", outlen);
+}
+
 static PyMethodDef methods[] = {
 	{"patches", patches, METH_VARARGS, "apply a series of patches\n"},
+	{"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
 	{NULL, NULL}
 };
 
--- a/mercurial/revlog.py	Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/revlog.py	Tue Apr 04 16:38:44 2006 -0400
@@ -342,8 +342,40 @@
         if self.version != 0:
             return self.ngoffset(self.index[rev][0])
         return self.index[rev][0]
+
     def end(self, rev): return self.start(rev) + self.length(rev)
 
+    def size(self, rev):
+        """return the length of the uncompressed text for a given revision"""
+        l = -1
+        if self.version != 0:
+            l = self.index[rev][2]
+        if l >= 0:
+            return l
+
+        t = self.revision(self.node(rev))
+        return len(t)
+
+        # alternate implementation, The advantage to this code is it
+        # will be faster for a single revision.  But, the results are not
+        # cached, so finding the size of every revision will be slower.
+        """
+        if self.cache and self.cache[1] == rev:
+            return len(self.cache[2])
+
+        base = self.base(rev)
+        if self.cache and self.cache[1] >= base and self.cache[1] < rev:
+            base = self.cache[1]
+            text = self.cache[2]
+        else:
+            text = self.revision(self.node(base))
+
+        l = len(text)
+        for x in xrange(base + 1, rev + 1):
+            l = mdiff.patchedsize(l, self.chunk(x))
+        return l
+        """
+
     def length(self, rev):
         if rev < 0:
             return 0
@@ -904,7 +936,7 @@
         node = None
 
         base = prev = -1
-        start = end = measure = 0
+        start = end = textlen = 0
         if r:
             end = self.end(t)
 
@@ -949,8 +981,9 @@
             if chain == prev:
                 tempd = compress(delta)
                 cdelta = tempd[0] + tempd[1]
+                textlen = mdiff.patchedsize(textlen, delta)
 
-            if chain != prev or (end - start + len(cdelta)) > measure * 2:
+            if chain != prev or (end - start + len(cdelta)) > textlen * 2:
                 # flush our writes here so we can read it in revision
                 if dfh:
                     dfh.flush()
@@ -960,12 +993,12 @@
                 chk = self.addrevision(text, transaction, link, p1, p2)
                 if chk != node:
                     raise RevlogError(_("consistency error adding group"))
-                measure = len(text)
+                textlen = len(text)
             else:
                 if self.version == 0:
                     e = (end, len(cdelta), base, link, p1, p2, node)
                 else:
-                    e = (self.offset_type(end, 0), len(cdelta), -1, base,
+                    e = (self.offset_type(end, 0), len(cdelta), textlen, base,
                          link, self.rev(p1), self.rev(p2), node)
                 self.index.append(e)
                 self.nodemap[node] = r