--- a/mercurial/revlog.py Tue Apr 04 16:38:44 2006 -0400
+++ b/mercurial/revlog.py Tue Apr 04 16:47:12 2006 -0400
@@ -64,6 +64,7 @@
raise RevlogError(_("unknown compression type %r") % t)
indexformatv0 = ">4l20s20s20s"
+v0shaoffset = 56
# index ng:
# 6 bytes offset
# 2 bytes flags
@@ -75,48 +76,135 @@
# 4 bytes parent 2 rev
# 32 bytes: nodeid
indexformatng = ">Qiiiiii20s12x"
+ngshaoffset = 32
versionformat = ">i"
class lazyparser(object):
"""
this class avoids the need to parse the entirety of large indices
-
- By default we parse and load 1000 entries at a time.
-
- If no position is specified, we load the whole index, and replace
- the lazy objects in revlog with the underlying objects for
- efficiency in cases where we look at most of the nodes.
"""
- def __init__(self, data, revlog, indexformat):
- self.data = data
+ def __init__(self, dataf, size, indexformat, shaoffset):
+ self.dataf = dataf
+ self.format = indexformat
self.s = struct.calcsize(indexformat)
self.indexformat = indexformat
- self.l = len(data)/self.s
+ self.datasize = size
+ self.l = size/self.s
self.index = [None] * self.l
self.map = {nullid: -1}
+ self.allmap = 0
self.all = 0
- self.revlog = revlog
+ self.mapfind_count = 0
+ self.shaoffset = shaoffset
- def load(self, pos=None):
+ def loadmap(self):
+ """
+ during a commit, we need to make sure the rev being added is
+ not a duplicate. This requires loading the entire index,
+ which is fairly slow. loadmap can load up just the node map,
+ which takes much less time.
+ """
+ if self.allmap: return
+ start = 0
+ end = self.datasize
+ self.allmap = 1
+ cur = 0
+ count = 0
+ blocksize = self.s * 256
+ self.dataf.seek(0)
+ while cur < end:
+ data = self.dataf.read(blocksize)
+ off = 0
+ for x in xrange(256):
+ n = data[off + self.shaoffset:off + self.shaoffset + 20]
+ self.map[n] = count
+ count += 1
+ if count >= self.l:
+ break
+ off += self.s
+ cur += blocksize
+
+ def loadblock(self, blockstart, blocksize, data=None):
if self.all: return
- if pos is not None:
- block = pos / 1000
- i = block * 1000
- end = min(self.l, i + 1000)
+ if data is None:
+ self.dataf.seek(blockstart)
+ data = self.dataf.read(blocksize)
+ lend = len(data) / self.s
+ i = blockstart / self.s
+ off = 0
+ for x in xrange(lend):
+ if self.index[i + x] == None:
+ b = data[off : off + self.s]
+ e = struct.unpack(self.format, b)
+ self.index[i + x] = e
+ self.map[e[-1]] = i + x
+ off += self.s
+
+ def findnode(self, node):
+ """search backwards through the index file for a specific node"""
+ if self.allmap: return None
+
+ # hg log will cause many many searches for the manifest
+ # nodes. After we get called a few times, just load the whole
+ # thing.
+ if self.mapfind_count > 8:
+ self.loadmap()
+ if node in self.map:
+ return node
+ return None
+ self.mapfind_count += 1
+ last = self.l - 1
+ while self.index[last] != None:
+ if last == 0:
+ self.all = 1
+ self.allmap = 1
+ return None
+ last -= 1
+ end = (last + 1) * self.s
+ blocksize = self.s * 256
+ while end >= 0:
+ start = max(end - blocksize, 0)
+ self.dataf.seek(start)
+ data = self.dataf.read(end - start)
+ findend = end - start
+ while True:
+ # we're searching backwards, so weh have to make sure
+ # we don't find a changeset where this node is a parent
+ off = data.rfind(node, 0, findend)
+ findend = off
+ if off >= 0:
+ i = off / self.s
+ off = i * self.s
+ n = data[off + self.shaoffset:off + self.shaoffset + 20]
+ if n == node:
+ self.map[n] = i + start / self.s
+ return node
+ else:
+ break
+ end -= blocksize
+ return None
+
+ def loadindex(self, i=None, end=None):
+ if self.all: return
+ all = False
+ if i == None:
+ blockstart = 0
+ blocksize = (512 / self.s) * self.s
+ end = self.datasize
+ all = True
else:
- self.all = 1
- i = 0
- end = self.l
- self.revlog.index = self.index
- self.revlog.nodemap = self.map
-
- while i < end:
- if not self.index[i]:
- d = self.data[i * self.s: (i + 1) * self.s]
- e = struct.unpack(self.indexformat, d)
- self.index[i] = e
- self.map[e[-1]] = i
- i += 1
+ if end:
+ blockstart = i * self.s
+ end = end * self.s
+ blocksize = end - blockstart
+ else:
+ blockstart = (i & ~(32)) * self.s
+ blocksize = self.s * 64
+ end = blockstart + blocksize
+ while blockstart < end:
+ self.loadblock(blockstart, blocksize)
+ blockstart += blocksize
+ if all: self.all = True
class lazyindex(object):
"""a lazy version of the index array"""
@@ -127,7 +215,7 @@
def load(self, pos):
if pos < 0:
pos += len(self.p.index)
- self.p.load(pos)
+ self.p.loadindex(pos)
return self.p.index[pos]
def __getitem__(self, pos):
return self.p.index[pos] or self.load(pos)
@@ -143,14 +231,13 @@
def __init__(self, parser):
self.p = parser
def load(self, key):
- if self.p.all: return
- n = self.p.data.find(key)
- if n < 0:
+ n = self.p.findnode(key)
+ if n == None:
raise KeyError(key)
- pos = n / self.p.s
- self.p.load(pos)
def __contains__(self, key):
- self.p.load()
+ if key in self.p.map:
+ return True
+ self.p.loadmap()
return key in self.p.map
def __iter__(self):
yield nullid
@@ -158,7 +245,7 @@
try:
yield self.p.index[i][-1]
except:
- self.p.load(i)
+ self.p.loadindex(i)
yield self.p.index[i][-1]
def __getitem__(self, key):
try:
@@ -222,7 +309,8 @@
v = self.defversion
try:
f = self.opener(self.indexfile)
- i = f.read()
+ i = f.read(4)
+ f.seek(0)
except IOError, inst:
if inst.errno != errno.ENOENT:
raise
@@ -241,7 +329,7 @@
return
self.indexstat = st
if len(i) > 0:
- v = struct.unpack(versionformat, i[:4])[0]
+ v = struct.unpack(versionformat, i)[0]
flags = v & ~0xFFFF
fmt = v & 0xFFFF
if fmt == 0:
@@ -258,16 +346,19 @@
self.version = v
if v == 0:
self.indexformat = indexformatv0
+ shaoffset = v0shaoffset
else:
self.indexformat = indexformatng
+ shaoffset = ngshaoffset
if i:
if not self.inlinedata() and st and st.st_size > 10000:
# big index, let's parse it on demand
- parser = lazyparser(i, self, self.indexformat)
+ parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
self.index = lazyindex(parser)
self.nodemap = lazymap(parser)
else:
+ i = f.read()
self.parseindex(i)
if self.inlinedata():
# we've already got the entire data file read in, save it
@@ -312,11 +403,23 @@
def offset_type(self, offset, type):
return long(long(offset) << 16 | type)
+ def loadindex(self, start, end):
+ """load a block of indexes all at once from the lazy parser"""
+ if isinstance(self.index, lazyindex):
+ self.index.p.loadindex(start, end)
+
def loadindexmap(self):
"""loads both the map and the index from the lazy parser"""
if isinstance(self.index, lazyindex):
p = self.index.p
- p.load()
+ p.loadindex()
+ self.nodemap = p.map
+
+ def loadmap(self):
+ """loads the map from the lazy parser"""
+ if isinstance(self.nodemap, lazymap):
+ self.nodemap.p.loadmap()
+ self.nodemap = self.nodemap.p.map
def inlinedata(self): return self.version & REVLOGNGINLINEDATA
def tip(self): return self.node(len(self.index) - 1)
@@ -690,7 +793,9 @@
if self.cache and self.cache[1] >= base and self.cache[1] < rev:
base = self.cache[1]
text = self.cache[2]
+ self.loadindex(base, rev + 1)
else:
+ self.loadindex(base, rev + 1)
text = self.chunk(base, df=df)
bins = []