comparison mercurial/revlog.py @ 13253:61c9bc3da402

revlog: remove lazy index
author Matt Mackall <mpm@selenic.com>
date Tue, 04 Jan 2011 14:12:52 -0600
parents 12ed25f39d0b
children 5ef5eb1f3515
comparison
equal deleted inserted replaced
13252:9f6afc288702 13253:61c9bc3da402
36 # revlog index flags 36 # revlog index flags
37 REVIDX_PARENTDELTA = 1 37 REVIDX_PARENTDELTA = 1
38 REVIDX_PUNCHED_FLAG = 2 38 REVIDX_PUNCHED_FLAG = 2
39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 # amount of data read unconditionally, should be >= 4
42 # when not inline: threshold for using lazy index
43 _prereadsize = 1048576
44 # max size of revlog with inline data 41 # max size of revlog with inline data
45 _maxinline = 131072 42 _maxinline = 131072
43 _chunksize = 1048576
46 44
47 RevlogError = error.RevlogError 45 RevlogError = error.RevlogError
48 LookupError = error.LookupError 46 LookupError = error.LookupError
49 47
50 def getoffset(q): 48 def getoffset(q):
119 return _decompress(bin) 117 return _decompress(bin)
120 if t == 'u': 118 if t == 'u':
121 return bin[1:] 119 return bin[1:]
122 raise RevlogError(_("unknown compression type %r") % t) 120 raise RevlogError(_("unknown compression type %r") % t)
123 121
124 class lazyparser(object):
125 """
126 this class avoids the need to parse the entirety of large indices
127 """
128
129 # lazyparser is not safe to use on windows if win32 extensions not
130 # available. it keeps file handle open, which make it not possible
131 # to break hardlinks on local cloned repos.
132
133 def __init__(self, dataf):
134 try:
135 size = util.fstat(dataf).st_size
136 except AttributeError:
137 size = 0
138 self.dataf = dataf
139 self.s = struct.calcsize(indexformatng)
140 self.datasize = size
141 self.l = size // self.s
142 self.index = [None] * self.l
143 self.map = {nullid: nullrev}
144 self.allmap = 0
145 self.all = 0
146 self.mapfind_count = 0
147
148 def loadmap(self):
149 """
150 during a commit, we need to make sure the rev being added is
151 not a duplicate. This requires loading the entire index,
152 which is fairly slow. loadmap can load up just the node map,
153 which takes much less time.
154 """
155 if self.allmap:
156 return
157 end = self.datasize
158 self.allmap = 1
159 cur = 0
160 count = 0
161 blocksize = self.s * 256
162 self.dataf.seek(0)
163 while cur < end:
164 data = self.dataf.read(blocksize)
165 off = 0
166 for x in xrange(256):
167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 self.map[n] = count
169 count += 1
170 if count >= self.l:
171 break
172 off += self.s
173 cur += blocksize
174
175 def loadblock(self, blockstart, blocksize, data=None):
176 if self.all:
177 return
178 if data is None:
179 self.dataf.seek(blockstart)
180 if blockstart + blocksize > self.datasize:
181 # the revlog may have grown since we've started running,
182 # but we don't have space in self.index for more entries.
183 # limit blocksize so that we don't get too much data.
184 blocksize = max(self.datasize - blockstart, 0)
185 data = self.dataf.read(blocksize)
186 lend = len(data) // self.s
187 i = blockstart // self.s
188 off = 0
189 # lazyindex supports __delitem__
190 if lend > len(self.index) - i:
191 lend = len(self.index) - i
192 for x in xrange(lend):
193 if self.index[i + x] is None:
194 b = data[off : off + self.s]
195 self.index[i + x] = b
196 n = b[ngshaoffset:ngshaoffset + 20]
197 self.map[n] = i + x
198 off += self.s
199
200 def findnode(self, node):
201 """search backwards through the index file for a specific node"""
202 if self.allmap:
203 return None
204
205 # hg log will cause many many searches for the manifest
206 # nodes. After we get called a few times, just load the whole
207 # thing.
208 if self.mapfind_count > 8:
209 self.loadmap()
210 if node in self.map:
211 return node
212 return None
213 self.mapfind_count += 1
214 last = self.l - 1
215 while self.index[last] is not None:
216 if last == 0:
217 self.all = 1
218 self.allmap = 1
219 return None
220 last -= 1
221 end = (last + 1) * self.s
222 blocksize = self.s * 256
223 while end >= 0:
224 start = max(end - blocksize, 0)
225 self.dataf.seek(start)
226 data = self.dataf.read(end - start)
227 findend = end - start
228 while True:
229 # we're searching backwards, so we have to make sure
230 # we don't find a changeset where this node is a parent
231 off = data.find(node, 0, findend)
232 findend = off
233 if off >= 0:
234 i = off / self.s
235 off = i * self.s
236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 if n == node:
238 self.map[n] = i + start / self.s
239 return node
240 else:
241 break
242 end -= blocksize
243 return None
244
245 def loadindex(self, i=None, end=None):
246 if self.all:
247 return
248 all = False
249 if i is None:
250 blockstart = 0
251 blocksize = (65536 / self.s) * self.s
252 end = self.datasize
253 all = True
254 else:
255 if end:
256 blockstart = i * self.s
257 end = end * self.s
258 blocksize = end - blockstart
259 else:
260 blockstart = (i & ~1023) * self.s
261 blocksize = self.s * 1024
262 end = blockstart + blocksize
263 while blockstart < end:
264 self.loadblock(blockstart, blocksize)
265 blockstart += blocksize
266 if all:
267 self.all = True
268
269 class lazyindex(object):
270 """a lazy version of the index array"""
271 def __init__(self, parser):
272 self.p = parser
273 def __len__(self):
274 return len(self.p.index)
275 def load(self, pos):
276 if pos < 0:
277 pos += len(self.p.index)
278 self.p.loadindex(pos)
279 return self.p.index[pos]
280 def __getitem__(self, pos):
281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 def __setitem__(self, pos, item):
283 self.p.index[pos] = _pack(indexformatng, *item)
284 def __delitem__(self, pos):
285 del self.p.index[pos]
286 def insert(self, pos, e):
287 self.p.index.insert(pos, _pack(indexformatng, *e))
288 def append(self, e):
289 self.p.index.append(_pack(indexformatng, *e))
290
291 class lazymap(object):
292 """a lazy version of the node map"""
293 def __init__(self, parser):
294 self.p = parser
295 def load(self, key):
296 n = self.p.findnode(key)
297 if n is None:
298 raise KeyError(key)
299 def __contains__(self, key):
300 if key in self.p.map:
301 return True
302 self.p.loadmap()
303 return key in self.p.map
304 def __iter__(self):
305 yield nullid
306 for i, ret in enumerate(self.p.index):
307 if not ret:
308 self.p.loadindex(i)
309 ret = self.p.index[i]
310 if isinstance(ret, str):
311 ret = _unpack(indexformatng, ret)
312 yield ret[7]
313 def __getitem__(self, key):
314 try:
315 return self.p.map[key]
316 except KeyError:
317 try:
318 self.load(key)
319 return self.p.map[key]
320 except KeyError:
321 raise KeyError("node " + hex(key))
322 def __setitem__(self, key, val):
323 self.p.map[key] = val
324 def __delitem__(self, key):
325 del self.p.map[key]
326
327 indexformatv0 = ">4l20s20s20s" 122 indexformatv0 = ">4l20s20s20s"
328 v0shaoffset = 56 123 v0shaoffset = 56
329 124
330 class revlogoldio(object): 125 class revlogoldio(object):
331 def __init__(self): 126 def __init__(self):
334 def parseindex(self, fp, data, inline): 129 def parseindex(self, fp, data, inline):
335 s = self.size 130 s = self.size
336 index = [] 131 index = []
337 nodemap = {nullid: nullrev} 132 nodemap = {nullid: nullrev}
338 n = off = 0 133 n = off = 0
339 if len(data) == _prereadsize:
340 data += fp.read() # read the rest
341 l = len(data) 134 l = len(data)
342 while off + s <= l: 135 while off + s <= l:
343 cur = data[off:off + s] 136 cur = data[off:off + s]
344 off += s 137 off += s
345 e = _unpack(indexformatv0, cur) 138 e = _unpack(indexformatv0, cur)
376 class revlogio(object): 169 class revlogio(object):
377 def __init__(self): 170 def __init__(self):
378 self.size = struct.calcsize(indexformatng) 171 self.size = struct.calcsize(indexformatng)
379 172
380 def parseindex(self, fp, data, inline): 173 def parseindex(self, fp, data, inline):
381 if len(data) == _prereadsize:
382 if util.openhardlinks() and not inline:
383 # big index, let's parse it on demand
384 parser = lazyparser(fp)
385 index = lazyindex(parser)
386 nodemap = lazymap(parser)
387 e = list(index[0])
388 type = gettype(e[0])
389 e[0] = offset_type(0, type)
390 index[0] = e
391 return index, nodemap, None
392 else:
393 data += fp.read()
394
395 # call the C implementation to parse the index data 174 # call the C implementation to parse the index data
396 index, nodemap, cache = parsers.parse_index(data, inline) 175 index, nodemap, cache = parsers.parse_index(data, inline)
397 return index, nodemap, cache 176 return index, nodemap, cache
398 177
399 def packentry(self, entry, node, version, rev): 178 def packentry(self, entry, node, version, rev):
456 v |= REVLOGSHALLOW 235 v |= REVLOGSHALLOW
457 236
458 i = '' 237 i = ''
459 try: 238 try:
460 f = self.opener(self.indexfile) 239 f = self.opener(self.indexfile)
461 if "nonlazy" in getattr(self.opener, 'options', {}): 240 i = f.read()
462 i = f.read()
463 else:
464 i = f.read(_prereadsize)
465 if len(i) > 0: 241 if len(i) > 0:
466 v = struct.unpack(versionformat, i[:4])[0] 242 v = struct.unpack(versionformat, i[:4])[0]
467 except IOError, inst: 243 except IOError, inst:
468 if inst.errno != errno.ENOENT: 244 if inst.errno != errno.ENOENT:
469 raise 245 raise
494 self.index, self.nodemap, self._chunkcache = d 270 self.index, self.nodemap, self._chunkcache = d
495 if not self._chunkcache: 271 if not self._chunkcache:
496 self._chunkclear() 272 self._chunkclear()
497 273
498 # add the magic null revision at -1 (if it hasn't been done already) 274 # add the magic null revision at -1 (if it hasn't been done already)
499 if (self.index == [] or isinstance(self.index, lazyindex) or 275 if self.index == [] or self.index[-1][7] != nullid:
500 self.index[-1][7] != nullid) :
501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid)) 276 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502
503 def _loadindex(self, start, end):
504 """load a block of indexes all at once from the lazy parser"""
505 if isinstance(self.index, lazyindex):
506 self.index.p.loadindex(start, end)
507
508 def _loadindexmap(self):
509 """loads both the map and the index from the lazy parser"""
510 if isinstance(self.index, lazyindex):
511 p = self.index.p
512 p.loadindex()
513 self.nodemap = p.map
514
515 def _loadmap(self):
516 """loads the map from the lazy parser"""
517 if isinstance(self.nodemap, lazymap):
518 self.nodemap.p.loadmap()
519 self.nodemap = self.nodemap.p.map
520 277
521 def tip(self): 278 def tip(self):
522 return self.node(len(self.index) - 2) 279 return self.node(len(self.index) - 2)
523 def __len__(self): 280 def __len__(self):
524 return len(self.index) - 1 281 return len(self.index) - 1
976 return hash(text, p1, p2) != node 733 return hash(text, p1, p2) != node
977 734
978 def _addchunk(self, offset, data): 735 def _addchunk(self, offset, data):
979 o, d = self._chunkcache 736 o, d = self._chunkcache
980 # try to add to existing cache 737 # try to add to existing cache
981 if o + len(d) == offset and len(d) + len(data) < _prereadsize: 738 if o + len(d) == offset and len(d) + len(data) < _chunksize:
982 self._chunkcache = o, d + data 739 self._chunkcache = o, d + data
983 else: 740 else:
984 self._chunkcache = offset, data 741 self._chunkcache = offset, data
985 742
986 def _loadchunk(self, offset, length): 743 def _loadchunk(self, offset, length):
1058 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS: 815 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1059 raise RevlogError(_('incompatible revision flag %x') % 816 raise RevlogError(_('incompatible revision flag %x') %
1060 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS)) 817 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1061 818
1062 # build delta chain 819 # build delta chain
1063 self._loadindex(base, rev + 1)
1064 chain = [] 820 chain = []
1065 index = self.index # for performance 821 index = self.index # for performance
1066 iterrev = rev 822 iterrev = rev
1067 e = index[iterrev] 823 e = index[iterrev]
1068 while iterrev != base and iterrev != cachedrev: 824 while iterrev != base and iterrev != cachedrev:
1411 removed and that it'll readd them after this truncation. 1167 removed and that it'll readd them after this truncation.
1412 """ 1168 """
1413 if len(self) == 0: 1169 if len(self) == 0:
1414 return 1170 return
1415 1171
1416 if isinstance(self.index, lazyindex):
1417 self._loadindexmap()
1418
1419 for rev in self: 1172 for rev in self:
1420 if self.index[rev][4] >= minlink: 1173 if self.index[rev][4] >= minlink:
1421 break 1174 break
1422 else: 1175 else:
1423 return 1176 return