Mercurial > hg
changeset 13253:61c9bc3da402
revlog: remove lazy index
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Tue, 04 Jan 2011 14:12:52 -0600 |
parents | 9f6afc288702 |
children | 5ef5eb1f3515 |
files | contrib/perf.py mercurial/localrepo.py mercurial/pure/parsers.py mercurial/revlog.py mercurial/statichttprepo.py tests/test-parseindex2.py |
diffstat | 6 files changed, 7 insertions(+), 259 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/perf.py Tue Jan 11 14:58:17 2011 -0600 +++ b/contrib/perf.py Tue Jan 04 14:12:52 2011 -0600 @@ -83,8 +83,7 @@ import mercurial.changelog def d(): t = repo.changelog.tip() - repo.changelog = mercurial.changelog.changelog(repo.sopener) - repo.changelog._loadindexmap() + repo.invalidate() timer(d) def perfstartup(ui, repo):
--- a/mercurial/localrepo.py Tue Jan 11 14:58:17 2011 -0600 +++ b/mercurial/localrepo.py Tue Jan 04 14:12:52 2011 -0600 @@ -1412,9 +1412,6 @@ # Nor do we know which filenodes are missing. msng_filenode_set = {} - junk = mnfst.index[len(mnfst) - 1] # Get around a bug in lazyindex - junk = None - # A changeset always belongs to itself, so the changenode lookup # function for a changenode is identity. def identity(x):
--- a/mercurial/pure/parsers.py Tue Jan 11 14:58:17 2011 -0600 +++ b/mercurial/pure/parsers.py Tue Jan 04 14:12:52 2011 -0600 @@ -38,7 +38,7 @@ cache = None nodemap = {nullid: nullrev} n = off = 0 - # if we're not using lazymap, always read the whole index + l = len(data) - s append = index.append if inline:
--- a/mercurial/revlog.py Tue Jan 11 14:58:17 2011 -0600 +++ b/mercurial/revlog.py Tue Jan 04 14:12:52 2011 -0600 @@ -38,11 +38,9 @@ REVIDX_PUNCHED_FLAG = 2 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA -# amount of data read unconditionally, should be >= 4 -# when not inline: threshold for using lazy index -_prereadsize = 1048576 # max size of revlog with inline data _maxinline = 131072 +_chunksize = 1048576 RevlogError = error.RevlogError LookupError = error.LookupError @@ -121,209 +119,6 @@ return bin[1:] raise RevlogError(_("unknown compression type %r") % t) -class lazyparser(object): - """ - this class avoids the need to parse the entirety of large indices - """ - - # lazyparser is not safe to use on windows if win32 extensions not - # available. it keeps file handle open, which make it not possible - # to break hardlinks on local cloned repos. - - def __init__(self, dataf): - try: - size = util.fstat(dataf).st_size - except AttributeError: - size = 0 - self.dataf = dataf - self.s = struct.calcsize(indexformatng) - self.datasize = size - self.l = size // self.s - self.index = [None] * self.l - self.map = {nullid: nullrev} - self.allmap = 0 - self.all = 0 - self.mapfind_count = 0 - - def loadmap(self): - """ - during a commit, we need to make sure the rev being added is - not a duplicate. This requires loading the entire index, - which is fairly slow. loadmap can load up just the node map, - which takes much less time. - """ - if self.allmap: - return - end = self.datasize - self.allmap = 1 - cur = 0 - count = 0 - blocksize = self.s * 256 - self.dataf.seek(0) - while cur < end: - data = self.dataf.read(blocksize) - off = 0 - for x in xrange(256): - n = data[off + ngshaoffset:off + ngshaoffset + 20] - self.map[n] = count - count += 1 - if count >= self.l: - break - off += self.s - cur += blocksize - - def loadblock(self, blockstart, blocksize, data=None): - if self.all: - return - if data is None: - self.dataf.seek(blockstart) - if blockstart + blocksize > self.datasize: - # the revlog may have grown since we've started running, - # but we don't have space in self.index for more entries. - # limit blocksize so that we don't get too much data. - blocksize = max(self.datasize - blockstart, 0) - data = self.dataf.read(blocksize) - lend = len(data) // self.s - i = blockstart // self.s - off = 0 - # lazyindex supports __delitem__ - if lend > len(self.index) - i: - lend = len(self.index) - i - for x in xrange(lend): - if self.index[i + x] is None: - b = data[off : off + self.s] - self.index[i + x] = b - n = b[ngshaoffset:ngshaoffset + 20] - self.map[n] = i + x - off += self.s - - def findnode(self, node): - """search backwards through the index file for a specific node""" - if self.allmap: - return None - - # hg log will cause many many searches for the manifest - # nodes. After we get called a few times, just load the whole - # thing. - if self.mapfind_count > 8: - self.loadmap() - if node in self.map: - return node - return None - self.mapfind_count += 1 - last = self.l - 1 - while self.index[last] is not None: - if last == 0: - self.all = 1 - self.allmap = 1 - return None - last -= 1 - end = (last + 1) * self.s - blocksize = self.s * 256 - while end >= 0: - start = max(end - blocksize, 0) - self.dataf.seek(start) - data = self.dataf.read(end - start) - findend = end - start - while True: - # we're searching backwards, so we have to make sure - # we don't find a changeset where this node is a parent - off = data.find(node, 0, findend) - findend = off - if off >= 0: - i = off / self.s - off = i * self.s - n = data[off + ngshaoffset:off + ngshaoffset + 20] - if n == node: - self.map[n] = i + start / self.s - return node - else: - break - end -= blocksize - return None - - def loadindex(self, i=None, end=None): - if self.all: - return - all = False - if i is None: - blockstart = 0 - blocksize = (65536 / self.s) * self.s - end = self.datasize - all = True - else: - if end: - blockstart = i * self.s - end = end * self.s - blocksize = end - blockstart - else: - blockstart = (i & ~1023) * self.s - blocksize = self.s * 1024 - end = blockstart + blocksize - while blockstart < end: - self.loadblock(blockstart, blocksize) - blockstart += blocksize - if all: - self.all = True - -class lazyindex(object): - """a lazy version of the index array""" - def __init__(self, parser): - self.p = parser - def __len__(self): - return len(self.p.index) - def load(self, pos): - if pos < 0: - pos += len(self.p.index) - self.p.loadindex(pos) - return self.p.index[pos] - def __getitem__(self, pos): - return _unpack(indexformatng, self.p.index[pos] or self.load(pos)) - def __setitem__(self, pos, item): - self.p.index[pos] = _pack(indexformatng, *item) - def __delitem__(self, pos): - del self.p.index[pos] - def insert(self, pos, e): - self.p.index.insert(pos, _pack(indexformatng, *e)) - def append(self, e): - self.p.index.append(_pack(indexformatng, *e)) - -class lazymap(object): - """a lazy version of the node map""" - def __init__(self, parser): - self.p = parser - def load(self, key): - n = self.p.findnode(key) - if n is None: - raise KeyError(key) - def __contains__(self, key): - if key in self.p.map: - return True - self.p.loadmap() - return key in self.p.map - def __iter__(self): - yield nullid - for i, ret in enumerate(self.p.index): - if not ret: - self.p.loadindex(i) - ret = self.p.index[i] - if isinstance(ret, str): - ret = _unpack(indexformatng, ret) - yield ret[7] - def __getitem__(self, key): - try: - return self.p.map[key] - except KeyError: - try: - self.load(key) - return self.p.map[key] - except KeyError: - raise KeyError("node " + hex(key)) - def __setitem__(self, key, val): - self.p.map[key] = val - def __delitem__(self, key): - del self.p.map[key] - indexformatv0 = ">4l20s20s20s" v0shaoffset = 56 @@ -336,8 +131,6 @@ index = [] nodemap = {nullid: nullrev} n = off = 0 - if len(data) == _prereadsize: - data += fp.read() # read the rest l = len(data) while off + s <= l: cur = data[off:off + s] @@ -378,20 +171,6 @@ self.size = struct.calcsize(indexformatng) def parseindex(self, fp, data, inline): - if len(data) == _prereadsize: - if util.openhardlinks() and not inline: - # big index, let's parse it on demand - parser = lazyparser(fp) - index = lazyindex(parser) - nodemap = lazymap(parser) - e = list(index[0]) - type = gettype(e[0]) - e[0] = offset_type(0, type) - index[0] = e - return index, nodemap, None - else: - data += fp.read() - # call the C implementation to parse the index data index, nodemap, cache = parsers.parse_index(data, inline) return index, nodemap, cache @@ -458,10 +237,7 @@ i = '' try: f = self.opener(self.indexfile) - if "nonlazy" in getattr(self.opener, 'options', {}): - i = f.read() - else: - i = f.read(_prereadsize) + i = f.read() if len(i) > 0: v = struct.unpack(versionformat, i[:4])[0] except IOError, inst: @@ -496,28 +272,9 @@ self._chunkclear() # add the magic null revision at -1 (if it hasn't been done already) - if (self.index == [] or isinstance(self.index, lazyindex) or - self.index[-1][7] != nullid) : + if self.index == [] or self.index[-1][7] != nullid: self.index.append((0, 0, 0, -1, -1, -1, -1, nullid)) - def _loadindex(self, start, end): - """load a block of indexes all at once from the lazy parser""" - if isinstance(self.index, lazyindex): - self.index.p.loadindex(start, end) - - def _loadindexmap(self): - """loads both the map and the index from the lazy parser""" - if isinstance(self.index, lazyindex): - p = self.index.p - p.loadindex() - self.nodemap = p.map - - def _loadmap(self): - """loads the map from the lazy parser""" - if isinstance(self.nodemap, lazymap): - self.nodemap.p.loadmap() - self.nodemap = self.nodemap.p.map - def tip(self): return self.node(len(self.index) - 2) def __len__(self): @@ -978,7 +735,7 @@ def _addchunk(self, offset, data): o, d = self._chunkcache # try to add to existing cache - if o + len(d) == offset and len(d) + len(data) < _prereadsize: + if o + len(d) == offset and len(d) + len(data) < _chunksize: self._chunkcache = o, d + data else: self._chunkcache = offset, data @@ -1060,7 +817,6 @@ (self.flags(rev) & ~REVIDX_KNOWN_FLAGS)) # build delta chain - self._loadindex(base, rev + 1) chain = [] index = self.index # for performance iterrev = rev @@ -1413,9 +1169,6 @@ if len(self) == 0: return - if isinstance(self.index, lazyindex): - self._loadindexmap() - for rev in self: if self.index[rev][4] >= minlink: break
--- a/mercurial/statichttprepo.py Tue Jan 11 14:58:17 2011 -0600 +++ b/mercurial/statichttprepo.py Tue Jan 04 14:12:52 2011 -0600 @@ -77,7 +77,6 @@ return httprangereader(f, urlopener) return o - opener.options = {'nonlazy': 1} return opener class statichttprepository(localrepo.localrepository):
--- a/tests/test-parseindex2.py Tue Jan 11 14:58:17 2011 -0600 +++ b/tests/test-parseindex2.py Tue Jan 04 14:12:52 2011 -0600 @@ -21,7 +21,7 @@ index = [] nodemap = {nullid: nullrev} n = off = 0 - # if we're not using lazymap, always read the whole index + l = len(data) - s append = index.append if inline: