73 # 4 bytes link rev |
74 # 4 bytes link rev |
74 # 4 bytes parent 1 rev |
75 # 4 bytes parent 1 rev |
75 # 4 bytes parent 2 rev |
76 # 4 bytes parent 2 rev |
76 # 32 bytes: nodeid |
77 # 32 bytes: nodeid |
77 indexformatng = ">Qiiiiii20s12x" |
78 indexformatng = ">Qiiiiii20s12x" |
|
79 ngshaoffset = 32 |
78 versionformat = ">i" |
80 versionformat = ">i" |
79 |
81 |
80 class lazyparser(object): |
82 class lazyparser(object): |
81 """ |
83 """ |
82 this class avoids the need to parse the entirety of large indices |
84 this class avoids the need to parse the entirety of large indices |
83 |
|
84 By default we parse and load 1000 entries at a time. |
|
85 |
|
86 If no position is specified, we load the whole index, and replace |
|
87 the lazy objects in revlog with the underlying objects for |
|
88 efficiency in cases where we look at most of the nodes. |
|
89 """ |
85 """ |
90 def __init__(self, data, revlog, indexformat): |
86 def __init__(self, dataf, size, indexformat, shaoffset): |
91 self.data = data |
87 self.dataf = dataf |
|
88 self.format = indexformat |
92 self.s = struct.calcsize(indexformat) |
89 self.s = struct.calcsize(indexformat) |
93 self.indexformat = indexformat |
90 self.indexformat = indexformat |
94 self.l = len(data)/self.s |
91 self.datasize = size |
|
92 self.l = size/self.s |
95 self.index = [None] * self.l |
93 self.index = [None] * self.l |
96 self.map = {nullid: -1} |
94 self.map = {nullid: -1} |
|
95 self.allmap = 0 |
97 self.all = 0 |
96 self.all = 0 |
98 self.revlog = revlog |
97 self.mapfind_count = 0 |
99 |
98 self.shaoffset = shaoffset |
100 def load(self, pos=None): |
99 |
|
100 def loadmap(self): |
|
101 """ |
|
102 during a commit, we need to make sure the rev being added is |
|
103 not a duplicate. This requires loading the entire index, |
|
104 which is fairly slow. loadmap can load up just the node map, |
|
105 which takes much less time. |
|
106 """ |
|
107 if self.allmap: return |
|
108 start = 0 |
|
109 end = self.datasize |
|
110 self.allmap = 1 |
|
111 cur = 0 |
|
112 count = 0 |
|
113 blocksize = self.s * 256 |
|
114 self.dataf.seek(0) |
|
115 while cur < end: |
|
116 data = self.dataf.read(blocksize) |
|
117 off = 0 |
|
118 for x in xrange(256): |
|
119 n = data[off + self.shaoffset:off + self.shaoffset + 20] |
|
120 self.map[n] = count |
|
121 count += 1 |
|
122 if count >= self.l: |
|
123 break |
|
124 off += self.s |
|
125 cur += blocksize |
|
126 |
|
127 def loadblock(self, blockstart, blocksize, data=None): |
101 if self.all: return |
128 if self.all: return |
102 if pos is not None: |
129 if data is None: |
103 block = pos / 1000 |
130 self.dataf.seek(blockstart) |
104 i = block * 1000 |
131 data = self.dataf.read(blocksize) |
105 end = min(self.l, i + 1000) |
132 lend = len(data) / self.s |
106 else: |
133 i = blockstart / self.s |
107 self.all = 1 |
134 off = 0 |
108 i = 0 |
135 for x in xrange(lend): |
109 end = self.l |
136 if self.index[i + x] == None: |
110 self.revlog.index = self.index |
137 b = data[off : off + self.s] |
111 self.revlog.nodemap = self.map |
138 e = struct.unpack(self.format, b) |
112 |
139 self.index[i + x] = e |
113 while i < end: |
140 self.map[e[-1]] = i + x |
114 if not self.index[i]: |
141 off += self.s |
115 d = self.data[i * self.s: (i + 1) * self.s] |
142 |
116 e = struct.unpack(self.indexformat, d) |
143 def findnode(self, node): |
117 self.index[i] = e |
144 """search backwards through the index file for a specific node""" |
118 self.map[e[-1]] = i |
145 if self.allmap: return None |
119 i += 1 |
146 |
|
147 # hg log will cause many many searches for the manifest |
|
148 # nodes. After we get called a few times, just load the whole |
|
149 # thing. |
|
150 if self.mapfind_count > 8: |
|
151 self.loadmap() |
|
152 if node in self.map: |
|
153 return node |
|
154 return None |
|
155 self.mapfind_count += 1 |
|
156 last = self.l - 1 |
|
157 while self.index[last] != None: |
|
158 if last == 0: |
|
159 self.all = 1 |
|
160 self.allmap = 1 |
|
161 return None |
|
162 last -= 1 |
|
163 end = (last + 1) * self.s |
|
164 blocksize = self.s * 256 |
|
165 while end >= 0: |
|
166 start = max(end - blocksize, 0) |
|
167 self.dataf.seek(start) |
|
168 data = self.dataf.read(end - start) |
|
169 findend = end - start |
|
170 while True: |
|
171 # we're searching backwards, so weh have to make sure |
|
172 # we don't find a changeset where this node is a parent |
|
173 off = data.rfind(node, 0, findend) |
|
174 findend = off |
|
175 if off >= 0: |
|
176 i = off / self.s |
|
177 off = i * self.s |
|
178 n = data[off + self.shaoffset:off + self.shaoffset + 20] |
|
179 if n == node: |
|
180 self.map[n] = i + start / self.s |
|
181 return node |
|
182 else: |
|
183 break |
|
184 end -= blocksize |
|
185 return None |
|
186 |
|
187 def loadindex(self, i=None, end=None): |
|
188 if self.all: return |
|
189 all = False |
|
190 if i == None: |
|
191 blockstart = 0 |
|
192 blocksize = (512 / self.s) * self.s |
|
193 end = self.datasize |
|
194 all = True |
|
195 else: |
|
196 if end: |
|
197 blockstart = i * self.s |
|
198 end = end * self.s |
|
199 blocksize = end - blockstart |
|
200 else: |
|
201 blockstart = (i & ~(32)) * self.s |
|
202 blocksize = self.s * 64 |
|
203 end = blockstart + blocksize |
|
204 while blockstart < end: |
|
205 self.loadblock(blockstart, blocksize) |
|
206 blockstart += blocksize |
|
207 if all: self.all = True |
120 |
208 |
121 class lazyindex(object): |
209 class lazyindex(object): |
122 """a lazy version of the index array""" |
210 """a lazy version of the index array""" |
123 def __init__(self, parser): |
211 def __init__(self, parser): |
124 self.p = parser |
212 self.p = parser |
125 def __len__(self): |
213 def __len__(self): |
126 return len(self.p.index) |
214 return len(self.p.index) |
127 def load(self, pos): |
215 def load(self, pos): |
128 if pos < 0: |
216 if pos < 0: |
129 pos += len(self.p.index) |
217 pos += len(self.p.index) |
130 self.p.load(pos) |
218 self.p.loadindex(pos) |
131 return self.p.index[pos] |
219 return self.p.index[pos] |
132 def __getitem__(self, pos): |
220 def __getitem__(self, pos): |
133 return self.p.index[pos] or self.load(pos) |
221 return self.p.index[pos] or self.load(pos) |
134 def __setitem__(self, pos, item): |
222 def __setitem__(self, pos, item): |
135 self.p.index[pos] = item |
223 self.p.index[pos] = item |
141 class lazymap(object): |
229 class lazymap(object): |
142 """a lazy version of the node map""" |
230 """a lazy version of the node map""" |
143 def __init__(self, parser): |
231 def __init__(self, parser): |
144 self.p = parser |
232 self.p = parser |
145 def load(self, key): |
233 def load(self, key): |
146 if self.p.all: return |
234 n = self.p.findnode(key) |
147 n = self.p.data.find(key) |
235 if n == None: |
148 if n < 0: |
|
149 raise KeyError(key) |
236 raise KeyError(key) |
150 pos = n / self.p.s |
|
151 self.p.load(pos) |
|
152 def __contains__(self, key): |
237 def __contains__(self, key): |
153 self.p.load() |
238 if key in self.p.map: |
|
239 return True |
|
240 self.p.loadmap() |
154 return key in self.p.map |
241 return key in self.p.map |
155 def __iter__(self): |
242 def __iter__(self): |
156 yield nullid |
243 yield nullid |
157 for i in xrange(self.p.l): |
244 for i in xrange(self.p.l): |
158 try: |
245 try: |
159 yield self.p.index[i][-1] |
246 yield self.p.index[i][-1] |
160 except: |
247 except: |
161 self.p.load(i) |
248 self.p.loadindex(i) |
162 yield self.p.index[i][-1] |
249 yield self.p.index[i][-1] |
163 def __getitem__(self, key): |
250 def __getitem__(self, key): |
164 try: |
251 try: |
165 return self.p.map[key] |
252 return self.p.map[key] |
166 except KeyError: |
253 except KeyError: |
256 raise RevlogError(_("index %s invalid format %d" % |
344 raise RevlogError(_("index %s invalid format %d" % |
257 (self.indexfile, fmt))) |
345 (self.indexfile, fmt))) |
258 self.version = v |
346 self.version = v |
259 if v == 0: |
347 if v == 0: |
260 self.indexformat = indexformatv0 |
348 self.indexformat = indexformatv0 |
|
349 shaoffset = v0shaoffset |
261 else: |
350 else: |
262 self.indexformat = indexformatng |
351 self.indexformat = indexformatng |
|
352 shaoffset = ngshaoffset |
263 |
353 |
264 if i: |
354 if i: |
265 if not self.inlinedata() and st and st.st_size > 10000: |
355 if not self.inlinedata() and st and st.st_size > 10000: |
266 # big index, let's parse it on demand |
356 # big index, let's parse it on demand |
267 parser = lazyparser(i, self, self.indexformat) |
357 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset) |
268 self.index = lazyindex(parser) |
358 self.index = lazyindex(parser) |
269 self.nodemap = lazymap(parser) |
359 self.nodemap = lazymap(parser) |
270 else: |
360 else: |
|
361 i = f.read() |
271 self.parseindex(i) |
362 self.parseindex(i) |
272 if self.inlinedata(): |
363 if self.inlinedata(): |
273 # we've already got the entire data file read in, save it |
364 # we've already got the entire data file read in, save it |
274 # in the chunk data |
365 # in the chunk data |
275 self.chunkcache = (0, i) |
366 self.chunkcache = (0, i) |
310 return int(q & 0xFFFF) |
401 return int(q & 0xFFFF) |
311 |
402 |
312 def offset_type(self, offset, type): |
403 def offset_type(self, offset, type): |
313 return long(long(offset) << 16 | type) |
404 return long(long(offset) << 16 | type) |
314 |
405 |
|
406 def loadindex(self, start, end): |
|
407 """load a block of indexes all at once from the lazy parser""" |
|
408 if isinstance(self.index, lazyindex): |
|
409 self.index.p.loadindex(start, end) |
|
410 |
315 def loadindexmap(self): |
411 def loadindexmap(self): |
316 """loads both the map and the index from the lazy parser""" |
412 """loads both the map and the index from the lazy parser""" |
317 if isinstance(self.index, lazyindex): |
413 if isinstance(self.index, lazyindex): |
318 p = self.index.p |
414 p = self.index.p |
319 p.load() |
415 p.loadindex() |
|
416 self.nodemap = p.map |
|
417 |
|
418 def loadmap(self): |
|
419 """loads the map from the lazy parser""" |
|
420 if isinstance(self.nodemap, lazymap): |
|
421 self.nodemap.p.loadmap() |
|
422 self.nodemap = self.nodemap.p.map |
320 |
423 |
321 def inlinedata(self): return self.version & REVLOGNGINLINEDATA |
424 def inlinedata(self): return self.version & REVLOGNGINLINEDATA |
322 def tip(self): return self.node(len(self.index) - 1) |
425 def tip(self): return self.node(len(self.index) - 1) |
323 def count(self): return len(self.index) |
426 def count(self): return len(self.index) |
324 def node(self, rev): |
427 def node(self, rev): |