Fill in the uncompressed size during revlog.addgroup
authormason@suse.com
Tue, 04 Apr 2006 16:38:44 -0400
changeset 2078 441ea218414e
parent 2077 4d0700ae0991
child 2079 ee96ca273f32
Fill in the uncompressed size during revlog.addgroup This uses code from Matt to calculate the size change that would result from applying a delta to keep an accurate running total of the text size during revlog.addgroup
mercurial/mdiff.py
mercurial/mpatch.c
mercurial/revlog.py
--- a/mercurial/mdiff.py	Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/mdiff.py	Tue Apr 04 16:38:44 2006 -0400
@@ -192,4 +192,5 @@
     return mpatch.patches(a, [bin])
 
 patches = mpatch.patches
+patchedsize = mpatch.patchedsize
 textdiff = bdiff.bdiff
--- a/mercurial/mpatch.c	Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/mpatch.c	Tue Apr 04 16:38:44 2006 -0400
@@ -354,8 +354,44 @@
 	return result;
 }
 
+/* calculate size of a patched file directly */
+static PyObject *
+patchedsize(PyObject *self, PyObject *args)
+{
+	long orig, start, end, len, outlen = 0, last = 0;
+	int patchlen;
+	char *bin, *binend;
+	char decode[12]; /* for dealing with alignment issues */
+
+	if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
+		return NULL;
+
+	binend = bin + patchlen;
+
+	while (bin < binend) {
+		memcpy(decode, bin, 12);
+		start = ntohl(*(uint32_t *)decode);
+		end = ntohl(*(uint32_t *)(decode + 4));
+		len = ntohl(*(uint32_t *)(decode + 8));
+		bin += 12 + len;
+		outlen += start - last;
+		last = end;
+		outlen += len;
+	}
+
+	if (bin != binend) {
+		if (!PyErr_Occurred())
+			PyErr_SetString(mpatch_Error, "patch cannot be decoded");
+		return NULL;
+	}
+
+	outlen += orig - last;
+	return Py_BuildValue("l", outlen);
+}
+
 static PyMethodDef methods[] = {
 	{"patches", patches, METH_VARARGS, "apply a series of patches\n"},
+	{"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
 	{NULL, NULL}
 };
 
--- a/mercurial/revlog.py	Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/revlog.py	Tue Apr 04 16:38:44 2006 -0400
@@ -342,8 +342,40 @@
         if self.version != 0:
             return self.ngoffset(self.index[rev][0])
         return self.index[rev][0]
+
     def end(self, rev): return self.start(rev) + self.length(rev)
 
+    def size(self, rev):
+        """return the length of the uncompressed text for a given revision"""
+        l = -1
+        if self.version != 0:
+            l = self.index[rev][2]
+        if l >= 0:
+            return l
+
+        t = self.revision(self.node(rev))
+        return len(t)
+
+        # alternate implementation, The advantage to this code is it
+        # will be faster for a single revision.  But, the results are not
+        # cached, so finding the size of every revision will be slower.
+        """
+        if self.cache and self.cache[1] == rev:
+            return len(self.cache[2])
+
+        base = self.base(rev)
+        if self.cache and self.cache[1] >= base and self.cache[1] < rev:
+            base = self.cache[1]
+            text = self.cache[2]
+        else:
+            text = self.revision(self.node(base))
+
+        l = len(text)
+        for x in xrange(base + 1, rev + 1):
+            l = mdiff.patchedsize(l, self.chunk(x))
+        return l
+        """
+
     def length(self, rev):
         if rev < 0:
             return 0
@@ -904,7 +936,7 @@
         node = None
 
         base = prev = -1
-        start = end = measure = 0
+        start = end = textlen = 0
         if r:
             end = self.end(t)
 
@@ -949,8 +981,9 @@
             if chain == prev:
                 tempd = compress(delta)
                 cdelta = tempd[0] + tempd[1]
+                textlen = mdiff.patchedsize(textlen, delta)
 
-            if chain != prev or (end - start + len(cdelta)) > measure * 2:
+            if chain != prev or (end - start + len(cdelta)) > textlen * 2:
                 # flush our writes here so we can read it in revision
                 if dfh:
                     dfh.flush()
@@ -960,12 +993,12 @@
                 chk = self.addrevision(text, transaction, link, p1, p2)
                 if chk != node:
                     raise RevlogError(_("consistency error adding group"))
-                measure = len(text)
+                textlen = len(text)
             else:
                 if self.version == 0:
                     e = (end, len(cdelta), base, link, p1, p2, node)
                 else:
-                    e = (self.offset_type(end, 0), len(cdelta), -1, base,
+                    e = (self.offset_type(end, 0), len(cdelta), textlen, base,
                          link, self.rev(p1), self.rev(p2), node)
                 self.index.append(e)
                 self.nodemap[node] = r