lazyparser speed ups
authormpm@selenic.com
Mon, 13 Jun 2005 12:01:12 -0800
changeset 323 c6f0673ab7e9
parent 322 a0acae914e95
child 324 ce81bdd91d06
lazyparser speed ups -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 lazyparser speed ups When we do __contains__ on a map, we might as well load the whole index. Not doing this was slowing down finding new changesets quite by a factor of 20. When we do a full load, we also attempt to replace the revlog's index and nodemap with normal Python objects to avoid the lazymap overhead. manifest hash: 9b2b20aacc508f9027d115426c63a381d28e5485 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.0 (GNU/Linux) iD8DBQFCreYIywK+sNU5EO8RAoNHAJ9+LmXqsTQb9Bh3mZHq0A0VfQOleQCffHmn jC/O0vnfx5FCRsX2bUFG794= =BDTz -----END PGP SIGNATURE-----
mercurial/revlog.py
--- a/mercurial/revlog.py	Mon Jun 13 11:58:33 2005 -0800
+++ b/mercurial/revlog.py	Mon Jun 13 12:01:12 2005 -0800
@@ -43,17 +43,28 @@
 indexformat = ">4l20s20s20s"
 
 class lazyparser:
-    def __init__(self, data):
+    def __init__(self, data, revlog):
         self.data = data
         self.s = struct.calcsize(indexformat)
         self.l = len(data)/self.s
         self.index = [None] * self.l
         self.map = {nullid: -1}
+        self.all = 0
+        self.revlog = revlog
 
-    def load(self, pos):
-        block = pos / 1000
-        i = block * 1000
-        end = min(self.l, i + 1000)
+    def load(self, pos=None):
+        if self.all: return
+        if pos is not None:
+            block = pos / 1000
+            i = block * 1000
+            end = min(self.l, i + 1000)
+        else:
+            self.all = 1
+            i = 0
+            end = self.l
+            self.revlog.index = self.index
+            self.revlog.nodemap = self.map
+            
         while i < end:
             d = self.data[i * self.s: (i + 1) * self.s]
             e = struct.unpack(indexformat, d)
@@ -78,16 +89,14 @@
     def __init__(self, parser):
         self.p = parser
     def load(self, key):
+        if self.p.all: return
         n = self.p.data.find(key)
         if n < 0: raise KeyError("node " + hex(key))
         pos = n / self.p.s
         self.p.load(pos)
     def __contains__(self, key):
-        try:
-            self[key]
-            return True
-        except KeyError:
-            return False
+        self.p.load()
+        return key in self.p.map
     def __iter__(self):
         for i in xrange(self.p.l):
             try:
@@ -121,7 +130,7 @@
 
         if len(i) > 10000:
             # big index, let's parse it on demand
-            parser = lazyparser(i)
+            parser = lazyparser(i, self)
             self.index = lazyindex(parser)
             self.nodemap = lazymap(parser)
         else: