Mercurial > hg
comparison mercurial/tags.py @ 9151:f528d1a93491
tags: implement persistent tag caching (issue548).
- rename findglobaltags() to findglobaltags1() (so the "no cache"
implementation is still there if we need it)
- add findglobaltags2() and make findglobaltags() an alias for it
(disabling tag caching is a one-line patch)
- factor out tagcache class with methods readcache() and writecache();
the expensive part of tag finding (iterate over heads and find
.hgtags filenode) is now in tagcache.readcache()
author | Greg Ward <greg-hg@gerg.ca> |
---|---|
date | Thu, 16 Jul 2009 10:39:42 -0400 |
parents | abb7d4d43a5f |
children | 4017291c4c48 |
comparison
equal
deleted
inserted
replaced
9150:09a1ee498756 | 9151:f528d1a93491 |
---|---|
4 # Copyright 2009 Greg Ward <greg@gerg.ca> | 4 # Copyright 2009 Greg Ward <greg@gerg.ca> |
5 # | 5 # |
6 # This software may be used and distributed according to the terms of the | 6 # This software may be used and distributed according to the terms of the |
7 # GNU General Public License version 2, incorporated herein by reference. | 7 # GNU General Public License version 2, incorporated herein by reference. |
8 | 8 |
9 # Currently this module only deals with reading tags. Soon it will grow | 9 # Currently this module only deals with reading and caching tags. |
10 # support for caching tag info. Eventually, it could take care of | 10 # Eventually, it could take care of updating (adding/removing/moving) |
11 # updating (adding/removing/moving) tags too. | 11 # tags too. |
12 | 12 |
13 from node import bin, hex | 13 import os |
14 from node import nullid, bin, hex, short | |
14 from i18n import _ | 15 from i18n import _ |
15 import encoding | 16 import encoding |
16 import error | 17 import error |
17 | 18 |
18 def findglobaltags(ui, repo, alltags, tagtypes): | 19 def _debugalways(ui, *msg): |
20 ui.write(*msg) | |
21 | |
22 def _debugconditional(ui, *msg): | |
23 ui.debug(*msg) | |
24 | |
25 def _debugnever(ui, *msg): | |
26 pass | |
27 | |
28 _debug = _debugalways | |
29 _debug = _debugnever | |
30 | |
31 def findglobaltags1(ui, repo, alltags, tagtypes): | |
19 '''Find global tags in repo by reading .hgtags from every head that | 32 '''Find global tags in repo by reading .hgtags from every head that |
20 has a distinct version of it. Updates the dicts alltags, tagtypes | 33 has a distinct version of it. Updates the dicts alltags, tagtypes |
21 in place: alltags maps tag name to (node, hist) pair (see _readtags() | 34 in place: alltags maps tag name to (node, hist) pair (see _readtags() |
22 below), and tagtypes maps tag name to tag type ('global' in this | 35 below), and tagtypes maps tag name to tag type ('global' in this |
23 case).''' | 36 case).''' |
41 # read the tags file from each head, ending with the tip | 54 # read the tags file from each head, ending with the tip |
42 for fctx in reversed(ctxs): | 55 for fctx in reversed(ctxs): |
43 filetags = _readtags( | 56 filetags = _readtags( |
44 ui, repo, fctx.data().splitlines(), fctx) | 57 ui, repo, fctx.data().splitlines(), fctx) |
45 _updatetags(filetags, "global", alltags, tagtypes) | 58 _updatetags(filetags, "global", alltags, tagtypes) |
59 | |
60 def findglobaltags2(ui, repo, alltags, tagtypes): | |
61 '''Same as findglobaltags1(), but with caching.''' | |
62 (heads, tagfnode, shouldwrite) = _readtagcache(ui, repo) | |
63 | |
64 _debug(ui, "reading tags from %d head(s): %s\n" | |
65 % (len(heads), map(short, reversed(heads)))) | |
66 seen = set() # set of fnode | |
67 fctx = None | |
68 for head in reversed(heads): # oldest to newest | |
69 assert head in repo.changelog.nodemap, \ | |
70 "tag cache returned bogus head %s" % short(head) | |
71 | |
72 fnode = tagfnode.get(head) | |
73 if fnode and fnode not in seen: | |
74 seen.add(fnode) | |
75 if not fctx: | |
76 fctx = repo.filectx('.hgtags', fileid=fnode) | |
77 else: | |
78 fctx = fctx.filectx(fnode) | |
79 | |
80 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx) | |
81 _updatetags(filetags, 'global', alltags, tagtypes) | |
82 | |
83 # and update the cache (if necessary) | |
84 if shouldwrite: | |
85 _writetagcache(ui, repo, heads, tagfnode) | |
86 | |
87 # Set this to findglobaltags1 to disable tag caching. | |
88 findglobaltags = findglobaltags2 | |
46 | 89 |
47 def readlocaltags(ui, repo, alltags, tagtypes): | 90 def readlocaltags(ui, repo, alltags, tagtypes): |
48 '''Read local tags in repo. Update alltags and tagtypes.''' | 91 '''Read local tags in repo. Update alltags and tagtypes.''' |
49 try: | 92 try: |
50 data = encoding.fromlocal(repo.opener("localtags").read()) | 93 data = encoding.fromlocal(repo.opener("localtags").read()) |
118 anode = bnode | 161 anode = bnode |
119 ahist.extend([n for n in bhist if n not in ahist]) | 162 ahist.extend([n for n in bhist if n not in ahist]) |
120 alltags[name] = anode, ahist | 163 alltags[name] = anode, ahist |
121 tagtypes[name] = tagtype | 164 tagtypes[name] = tagtype |
122 | 165 |
166 | |
167 # The tag cache only stores info about heads, not the tag contents | |
168 # from each head. I.e. it doesn't try to squeeze out the maximum | |
169 # performance, but is simpler has a better chance of actually | |
170 # working correctly. And this gives the biggest performance win: it | |
171 # avoids looking up .hgtags in the manifest for every head, and it | |
172 # can avoid calling heads() at all if there have been no changes to | |
173 # the repo. | |
174 | |
175 def _readtagcache(ui, repo): | |
176 '''Read the tag cache and return a tuple (heads, fnodes, | |
177 shouldwrite). heads is the list of all heads currently in the | |
178 repository (ordered from tip to oldest) and fnodes is a mapping from | |
179 head to .hgtags filenode. Caller is responsible for reading tag | |
180 info from each head.''' | |
181 | |
182 try: | |
183 cachefile = repo.opener('tags.cache', 'r') | |
184 _debug(ui, 'reading tag cache from %s\n' % cachefile.name) | |
185 except IOError: | |
186 cachefile = None | |
187 | |
188 # The cache file consists of lines like | |
189 # <headrev> <headnode> [<tagnode>] | |
190 # where <headrev> and <headnode> redundantly identify a repository | |
191 # head from the time the cache was written, and <tagnode> is the | |
192 # filenode of .hgtags on that head. Heads with no .hgtags file will | |
193 # have no <tagnode>. The cache is ordered from tip to oldest (which | |
194 # is part of why <headrev> is there: a quick visual check is all | |
195 # that's required to ensure correct order). | |
196 # | |
197 # This information is enough to let us avoid the most expensive part | |
198 # of finding global tags, which is looking up <tagnode> in the | |
199 # manifest for each head. | |
200 cacherevs = [] # list of headrev | |
201 cacheheads = [] # list of headnode | |
202 cachefnode = {} # map headnode to filenode | |
203 if cachefile: | |
204 for line in cachefile: | |
205 line = line.rstrip().split() | |
206 cacherevs.append(int(line[0])) | |
207 headnode = bin(line[1]) | |
208 cacheheads.append(headnode) | |
209 if len(line) == 3: | |
210 fnode = bin(line[2]) | |
211 cachefnode[headnode] = fnode | |
212 | |
213 cachefile.close() | |
214 | |
215 tipnode = repo.changelog.tip() | |
216 tiprev = len(repo.changelog) - 1 | |
217 | |
218 # Case 1 (common): tip is the same, so nothing has changed. | |
219 # (Unchanged tip trivially means no changesets have been added. | |
220 # But, thanks to localrepository.destroyed(), it also means none | |
221 # have been destroyed by strip or rollback.) | |
222 if cacheheads and cacheheads[0] == tipnode and cacherevs[0] == tiprev: | |
223 _debug(ui, "tag cache: tip unchanged\n") | |
224 return (cacheheads, cachefnode, False) | |
225 | |
226 repoheads = repo.heads() | |
227 | |
228 # Case 2 (uncommon): empty repo; get out quickly and don't bother | |
229 # writing an empty cache. | |
230 if repoheads == [nullid]: | |
231 return ([], {}, False) | |
232 | |
233 # Case 3 (uncommon): cache file missing or empty. | |
234 if not cacheheads: | |
235 _debug(ui, 'tag cache: cache file missing or empty\n') | |
236 | |
237 # Case 4 (uncommon): tip rev decreased. This should only happen | |
238 # when we're called from localrepository.destroyed(). Refresh the | |
239 # cache so future invocations will not see disappeared heads in the | |
240 # cache. | |
241 elif cacheheads and tiprev < cacherevs[0]: | |
242 _debug(ui, | |
243 'tag cache: tip rev decremented (from %d to %d), ' | |
244 'so we must be destroying nodes\n' | |
245 % (cacherevs[0], tiprev)) | |
246 | |
247 # Case 5 (common): tip has changed, so we've added/replaced heads. | |
248 else: | |
249 _debug(ui, | |
250 'tag cache: tip has changed (%d:%s); must find new heads\n' | |
251 % (tiprev, short(tipnode))) | |
252 | |
253 # Luckily, the code to handle cases 3, 4, 5 is the same. So the | |
254 # above if/elif/else can disappear once we're confident this thing | |
255 # actually works and we don't need the debug output. | |
256 | |
257 # N.B. in case 4 (nodes destroyed), "new head" really means "newly | |
258 # exposed". | |
259 newheads = [head | |
260 for head in repoheads | |
261 if head not in set(cacheheads)] | |
262 _debug(ui, 'tag cache: found %d head(s) not in cache: %s\n' | |
263 % (len(newheads), map(short, newheads))) | |
264 | |
265 # Now we have to lookup the .hgtags filenode for every new head. | |
266 # This is the most expensive part of finding tags, so performance | |
267 # depends primarily on the size of newheads. Worst case: no cache | |
268 # file, so newheads == repoheads. | |
269 for head in newheads: | |
270 cctx = repo[head] | |
271 try: | |
272 fnode = cctx.filenode('.hgtags') | |
273 cachefnode[head] = fnode | |
274 except error.LookupError: | |
275 # no .hgtags file on this head | |
276 pass | |
277 | |
278 # Caller has to iterate over all heads, but can use the filenodes in | |
279 # cachefnode to get to each .hgtags revision quickly. | |
280 return (repoheads, cachefnode, True) | |
281 | |
282 def _writetagcache(ui, repo, heads, tagfnode): | |
283 | |
284 cachefile = repo.opener('tags.cache', 'w', atomictemp=True) | |
285 _debug(ui, 'writing cache file %s\n' % cachefile.name) | |
286 | |
287 realheads = repo.heads() # for sanity checks below | |
288 for head in heads: | |
289 # temporary sanity checks; these can probably be removed | |
290 # once this code has been in crew for a few weeks | |
291 assert head in repo.changelog.nodemap, \ | |
292 'trying to write non-existent node %s to tag cache' % short(head) | |
293 assert head in realheads, \ | |
294 'trying to write non-head %s to tag cache' % short(head) | |
295 assert head != nullid, \ | |
296 'trying to write nullid to tag cache' | |
297 | |
298 # This can't fail because of the first assert above. When/if we | |
299 # remove that assert, we might want to catch LookupError here | |
300 # and downgrade it to a warning. | |
301 rev = repo.changelog.rev(head) | |
302 | |
303 fnode = tagfnode.get(head) | |
304 if fnode: | |
305 cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode))) | |
306 else: | |
307 cachefile.write('%d %s\n' % (rev, hex(head))) | |
308 | |
309 cachefile.rename() | |
310 cachefile.close() |