comparison mercurial/tags.py @ 9151:f528d1a93491

tags: implement persistent tag caching (issue548). - rename findglobaltags() to findglobaltags1() (so the "no cache" implementation is still there if we need it) - add findglobaltags2() and make findglobaltags() an alias for it (disabling tag caching is a one-line patch) - factor out tagcache class with methods readcache() and writecache(); the expensive part of tag finding (iterate over heads and find .hgtags filenode) is now in tagcache.readcache()
author Greg Ward <greg-hg@gerg.ca>
date Thu, 16 Jul 2009 10:39:42 -0400
parents abb7d4d43a5f
children 4017291c4c48
comparison
equal deleted inserted replaced
9150:09a1ee498756 9151:f528d1a93491
4 # Copyright 2009 Greg Ward <greg@gerg.ca> 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 # 5 #
6 # This software may be used and distributed according to the terms of the 6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2, incorporated herein by reference. 7 # GNU General Public License version 2, incorporated herein by reference.
8 8
9 # Currently this module only deals with reading tags. Soon it will grow 9 # Currently this module only deals with reading and caching tags.
10 # support for caching tag info. Eventually, it could take care of 10 # Eventually, it could take care of updating (adding/removing/moving)
11 # updating (adding/removing/moving) tags too. 11 # tags too.
12 12
13 from node import bin, hex 13 import os
14 from node import nullid, bin, hex, short
14 from i18n import _ 15 from i18n import _
15 import encoding 16 import encoding
16 import error 17 import error
17 18
18 def findglobaltags(ui, repo, alltags, tagtypes): 19 def _debugalways(ui, *msg):
20 ui.write(*msg)
21
22 def _debugconditional(ui, *msg):
23 ui.debug(*msg)
24
25 def _debugnever(ui, *msg):
26 pass
27
28 _debug = _debugalways
29 _debug = _debugnever
30
31 def findglobaltags1(ui, repo, alltags, tagtypes):
19 '''Find global tags in repo by reading .hgtags from every head that 32 '''Find global tags in repo by reading .hgtags from every head that
20 has a distinct version of it. Updates the dicts alltags, tagtypes 33 has a distinct version of it. Updates the dicts alltags, tagtypes
21 in place: alltags maps tag name to (node, hist) pair (see _readtags() 34 in place: alltags maps tag name to (node, hist) pair (see _readtags()
22 below), and tagtypes maps tag name to tag type ('global' in this 35 below), and tagtypes maps tag name to tag type ('global' in this
23 case).''' 36 case).'''
41 # read the tags file from each head, ending with the tip 54 # read the tags file from each head, ending with the tip
42 for fctx in reversed(ctxs): 55 for fctx in reversed(ctxs):
43 filetags = _readtags( 56 filetags = _readtags(
44 ui, repo, fctx.data().splitlines(), fctx) 57 ui, repo, fctx.data().splitlines(), fctx)
45 _updatetags(filetags, "global", alltags, tagtypes) 58 _updatetags(filetags, "global", alltags, tagtypes)
59
60 def findglobaltags2(ui, repo, alltags, tagtypes):
61 '''Same as findglobaltags1(), but with caching.'''
62 (heads, tagfnode, shouldwrite) = _readtagcache(ui, repo)
63
64 _debug(ui, "reading tags from %d head(s): %s\n"
65 % (len(heads), map(short, reversed(heads))))
66 seen = set() # set of fnode
67 fctx = None
68 for head in reversed(heads): # oldest to newest
69 assert head in repo.changelog.nodemap, \
70 "tag cache returned bogus head %s" % short(head)
71
72 fnode = tagfnode.get(head)
73 if fnode and fnode not in seen:
74 seen.add(fnode)
75 if not fctx:
76 fctx = repo.filectx('.hgtags', fileid=fnode)
77 else:
78 fctx = fctx.filectx(fnode)
79
80 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
81 _updatetags(filetags, 'global', alltags, tagtypes)
82
83 # and update the cache (if necessary)
84 if shouldwrite:
85 _writetagcache(ui, repo, heads, tagfnode)
86
87 # Set this to findglobaltags1 to disable tag caching.
88 findglobaltags = findglobaltags2
46 89
47 def readlocaltags(ui, repo, alltags, tagtypes): 90 def readlocaltags(ui, repo, alltags, tagtypes):
48 '''Read local tags in repo. Update alltags and tagtypes.''' 91 '''Read local tags in repo. Update alltags and tagtypes.'''
49 try: 92 try:
50 data = encoding.fromlocal(repo.opener("localtags").read()) 93 data = encoding.fromlocal(repo.opener("localtags").read())
118 anode = bnode 161 anode = bnode
119 ahist.extend([n for n in bhist if n not in ahist]) 162 ahist.extend([n for n in bhist if n not in ahist])
120 alltags[name] = anode, ahist 163 alltags[name] = anode, ahist
121 tagtypes[name] = tagtype 164 tagtypes[name] = tagtype
122 165
166
167 # The tag cache only stores info about heads, not the tag contents
168 # from each head. I.e. it doesn't try to squeeze out the maximum
169 # performance, but is simpler has a better chance of actually
170 # working correctly. And this gives the biggest performance win: it
171 # avoids looking up .hgtags in the manifest for every head, and it
172 # can avoid calling heads() at all if there have been no changes to
173 # the repo.
174
175 def _readtagcache(ui, repo):
176 '''Read the tag cache and return a tuple (heads, fnodes,
177 shouldwrite). heads is the list of all heads currently in the
178 repository (ordered from tip to oldest) and fnodes is a mapping from
179 head to .hgtags filenode. Caller is responsible for reading tag
180 info from each head.'''
181
182 try:
183 cachefile = repo.opener('tags.cache', 'r')
184 _debug(ui, 'reading tag cache from %s\n' % cachefile.name)
185 except IOError:
186 cachefile = None
187
188 # The cache file consists of lines like
189 # <headrev> <headnode> [<tagnode>]
190 # where <headrev> and <headnode> redundantly identify a repository
191 # head from the time the cache was written, and <tagnode> is the
192 # filenode of .hgtags on that head. Heads with no .hgtags file will
193 # have no <tagnode>. The cache is ordered from tip to oldest (which
194 # is part of why <headrev> is there: a quick visual check is all
195 # that's required to ensure correct order).
196 #
197 # This information is enough to let us avoid the most expensive part
198 # of finding global tags, which is looking up <tagnode> in the
199 # manifest for each head.
200 cacherevs = [] # list of headrev
201 cacheheads = [] # list of headnode
202 cachefnode = {} # map headnode to filenode
203 if cachefile:
204 for line in cachefile:
205 line = line.rstrip().split()
206 cacherevs.append(int(line[0]))
207 headnode = bin(line[1])
208 cacheheads.append(headnode)
209 if len(line) == 3:
210 fnode = bin(line[2])
211 cachefnode[headnode] = fnode
212
213 cachefile.close()
214
215 tipnode = repo.changelog.tip()
216 tiprev = len(repo.changelog) - 1
217
218 # Case 1 (common): tip is the same, so nothing has changed.
219 # (Unchanged tip trivially means no changesets have been added.
220 # But, thanks to localrepository.destroyed(), it also means none
221 # have been destroyed by strip or rollback.)
222 if cacheheads and cacheheads[0] == tipnode and cacherevs[0] == tiprev:
223 _debug(ui, "tag cache: tip unchanged\n")
224 return (cacheheads, cachefnode, False)
225
226 repoheads = repo.heads()
227
228 # Case 2 (uncommon): empty repo; get out quickly and don't bother
229 # writing an empty cache.
230 if repoheads == [nullid]:
231 return ([], {}, False)
232
233 # Case 3 (uncommon): cache file missing or empty.
234 if not cacheheads:
235 _debug(ui, 'tag cache: cache file missing or empty\n')
236
237 # Case 4 (uncommon): tip rev decreased. This should only happen
238 # when we're called from localrepository.destroyed(). Refresh the
239 # cache so future invocations will not see disappeared heads in the
240 # cache.
241 elif cacheheads and tiprev < cacherevs[0]:
242 _debug(ui,
243 'tag cache: tip rev decremented (from %d to %d), '
244 'so we must be destroying nodes\n'
245 % (cacherevs[0], tiprev))
246
247 # Case 5 (common): tip has changed, so we've added/replaced heads.
248 else:
249 _debug(ui,
250 'tag cache: tip has changed (%d:%s); must find new heads\n'
251 % (tiprev, short(tipnode)))
252
253 # Luckily, the code to handle cases 3, 4, 5 is the same. So the
254 # above if/elif/else can disappear once we're confident this thing
255 # actually works and we don't need the debug output.
256
257 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
258 # exposed".
259 newheads = [head
260 for head in repoheads
261 if head not in set(cacheheads)]
262 _debug(ui, 'tag cache: found %d head(s) not in cache: %s\n'
263 % (len(newheads), map(short, newheads)))
264
265 # Now we have to lookup the .hgtags filenode for every new head.
266 # This is the most expensive part of finding tags, so performance
267 # depends primarily on the size of newheads. Worst case: no cache
268 # file, so newheads == repoheads.
269 for head in newheads:
270 cctx = repo[head]
271 try:
272 fnode = cctx.filenode('.hgtags')
273 cachefnode[head] = fnode
274 except error.LookupError:
275 # no .hgtags file on this head
276 pass
277
278 # Caller has to iterate over all heads, but can use the filenodes in
279 # cachefnode to get to each .hgtags revision quickly.
280 return (repoheads, cachefnode, True)
281
282 def _writetagcache(ui, repo, heads, tagfnode):
283
284 cachefile = repo.opener('tags.cache', 'w', atomictemp=True)
285 _debug(ui, 'writing cache file %s\n' % cachefile.name)
286
287 realheads = repo.heads() # for sanity checks below
288 for head in heads:
289 # temporary sanity checks; these can probably be removed
290 # once this code has been in crew for a few weeks
291 assert head in repo.changelog.nodemap, \
292 'trying to write non-existent node %s to tag cache' % short(head)
293 assert head in realheads, \
294 'trying to write non-head %s to tag cache' % short(head)
295 assert head != nullid, \
296 'trying to write nullid to tag cache'
297
298 # This can't fail because of the first assert above. When/if we
299 # remove that assert, we might want to catch LookupError here
300 # and downgrade it to a warning.
301 rev = repo.changelog.rev(head)
302
303 fnode = tagfnode.get(head)
304 if fnode:
305 cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode)))
306 else:
307 cachefile.write('%d %s\n' % (rev, hex(head)))
308
309 cachefile.rename()
310 cachefile.close()