# HG changeset patch # User Dirkjan Ochtman # Date 1241814910 -7200 # Node ID b7017097a4ec45af1779728dcac0d3ef1779d076 # Parent f2559645643ac990c0e4c4be59b613c7c92fa14a# Parent 6b8513f8274aa7b273e758d36bf18d3a61b43c60 merge with mpm diff -r f2559645643a -r b7017097a4ec mercurial/revlog.py --- a/mercurial/revlog.py Mon May 04 18:04:41 2009 +0900 +++ b/mercurial/revlog.py Fri May 08 22:35:10 2009 +0200 @@ -31,6 +31,8 @@ REVLOG_DEFAULT_FORMAT = REVLOGNG REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS +_prereadsize = 1048576 + RevlogError = error.RevlogError LookupError = error.LookupError @@ -315,12 +317,13 @@ def __init__(self): self.size = struct.calcsize(indexformatv0) - def parseindex(self, fp, inline): + def parseindex(self, fp, data, inline): s = self.size index = [] nodemap = {nullid: nullrev} n = off = 0 - data = fp.read() + if len(data) < _prereadsize: + data += fp.read() # read the rest l = len(data) while off + s <= l: cur = data[off:off + s] @@ -358,13 +361,15 @@ def __init__(self): self.size = struct.calcsize(indexformatng) - def parseindex(self, fp, inline): + def parseindex(self, fp, data, inline): try: - size = util.fstat(fp).st_size + size = len(data) + if size == _prereadsize: + size = util.fstat(fp).st_size except AttributeError: size = 0 - if util.openhardlinks() and not inline and size > 1000000: + if util.openhardlinks() and not inline and size > _prereadsize: # big index, let's parse it on demand parser = lazyparser(fp, size) index = lazyindex(parser) @@ -375,7 +380,6 @@ index[0] = e return index, nodemap, None - data = fp.read() # call the C implementation to parse the index data index, nodemap, cache = parsers.parse_index(data, inline) return index, nodemap, cache @@ -422,7 +426,7 @@ self.datafile = indexfile[:-2] + ".d" self.opener = opener self._cache = None - self._chunkcache = None + self._chunkcache = (0, '') self.nodemap = {nullid: nullrev} self.index = [] @@ -432,13 +436,12 @@ if v & REVLOGNG: v |= REVLOGNGINLINEDATA - i = "" + i = '' try: f = self.opener(self.indexfile) - i = f.read(4) - f.seek(0) + i = f.read(_prereadsize) if len(i) > 0: - v = struct.unpack(versionformat, i)[0] + v = struct.unpack(versionformat, i[:4])[0] except IOError, inst: if inst.errno != errno.ENOENT: raise @@ -462,10 +465,12 @@ self._io = revlogoldio() if i: try: - d = self._io.parseindex(f, self._inline) + d = self._io.parseindex(f, i, self._inline) except (ValueError, IndexError), e: raise RevlogError(_("index %s is corrupted") % (self.indexfile)) self.index, self.nodemap, self._chunkcache = d + if not self._chunkcache: + self._chunkcache = (0, '') # add the magic null revision at -1 (if it hasn't been done already) if (self.index == [] or isinstance(self.index, lazyindex) or @@ -907,42 +912,56 @@ p1, p2 = self.parents(node) return hash(text, p1, p2) != node + def _addchunk(self, offset, data): + o, d = self._chunkcache + # try to add to existing cache + if o + len(d) == offset and len(d) + len(data) < _prereadsize: + self._chunkcache = o, d + data + else: + self._chunkcache = offset, data + + def _loadchunk(self, offset, length, df=None): + if not df: + if self._inline: + df = self.opener(self.indexfile) + else: + df = self.opener(self.datafile) + + readahead = max(65536, length) + df.seek(offset) + d = df.read(readahead) + self._addchunk(offset, d) + if readahead > length: + return d[:length] + return d + + def _getchunk(self, offset, length, df=None): + o, d = self._chunkcache + l = len(d) + + # is it in the cache? + cachestart = offset - o + cacheend = cachestart + length + if cachestart >= 0 and cacheend <= l: + if cachestart == 0 and cacheend == l: + return d # avoid a copy + return d[cachestart:cacheend] + + return self._loadchunk(offset, length, df) + + def _prime(self, startrev, endrev, df): + start = self.start(startrev) + end = self.end(endrev) + if self._inline: + start += (startrev + 1) * self._io.size + end += (startrev + 1) * self._io.size + self._loadchunk(start, end - start, df) + def chunk(self, rev, df=None): - def loadcache(df): - if not df: - if self._inline: - df = self.opener(self.indexfile) - else: - df = self.opener(self.datafile) - df.seek(start) - self._chunkcache = (start, df.read(cache_length)) - start, length = self.start(rev), self.length(rev) if self._inline: start += (rev + 1) * self._io.size - end = start + length - - offset = 0 - if not self._chunkcache: - cache_length = max(65536, length) - loadcache(df) - else: - cache_start = self._chunkcache[0] - cache_length = len(self._chunkcache[1]) - cache_end = cache_start + cache_length - if start >= cache_start and end <= cache_end: - # it is cached - offset = start - cache_start - else: - cache_length = max(65536, length) - loadcache(df) - - # avoid copying large chunks - c = self._chunkcache[1] - if cache_length != length: - c = c[offset:offset + length] - - return decompress(c) + return decompress(self._getchunk(start, length, df)) def revdiff(self, rev1, rev2): """return or calculate a delta between two revisions""" @@ -978,10 +997,12 @@ self._loadindex(base, rev + 1) if not self._inline and rev > base + 1: df = self.opener(self.datafile) + self._prime(base, rev, df) else: self._loadindex(base, rev + 1) if not self._inline and rev > base: df = self.opener(self.datafile) + self._prime(base, rev, df) text = self.chunk(base, df=df) bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)] @@ -995,14 +1016,9 @@ return text def checkinlinesize(self, tr, fp=None): - if not self._inline: + if not self._inline or (self.start(-2) + self.length(-2)) < 131072: return - if not fp: - fp = self.opener(self.indexfile, 'r') - fp.seek(0, 2) - size = fp.tell() - if size < 131072: - return + trinfo = tr.find(self.indexfile) if trinfo == None: raise RevlogError(_("%s not found in the transaction") @@ -1012,19 +1028,22 @@ dataoff = self.start(trindex) tr.add(self.datafile, dataoff) + + if fp: + fp.flush() + fp.close() + df = self.opener(self.datafile, 'w') try: calc = self._io.size for r in self: start = self.start(r) + (r + 1) * calc length = self.length(r) - fp.seek(start) - d = fp.read(length) + d = self._getchunk(start, length) df.write(d) finally: df.close() - fp.close() fp = self.opener(self.indexfile, 'w', atomictemp=True) self.version &= ~(REVLOGNGINLINEDATA) self._inline = False @@ -1037,7 +1056,7 @@ fp.rename() tr.replace(self.indexfile, trindex * calc) - self._chunkcache = None + self._chunkcache = (0, '') def addrevision(self, text, transaction, link, p1, p2, d=None): """add a revision to the log @@ -1322,7 +1341,7 @@ # then reset internal state in memory to forget those revisions self._cache = None - self._chunkcache = None + self._chunkcache = (0, '') for x in xrange(rev, len(self)): del self.nodemap[self.node(x)]