comparison mercurial/tags.py @ 24445:c71edbafe603

tags: improve documentation The documentation for tags.py was making comprehension difficult. This patch rewrites most of the comments in the file to make it easier for mere mortals to understand what's going on.
author Gregory Szorc <gregory.szorc@gmail.com>
date Tue, 24 Mar 2015 20:28:39 -0700
parents 7b09dbbbd502
children f5de208a635c
comparison
equal deleted inserted replaced
24444:27e3ba73fbb1 24445:c71edbafe603
16 import encoding 16 import encoding
17 import error 17 import error
18 import errno 18 import errno
19 import time 19 import time
20 20
21 # The tags cache stores information about heads and the history of tags.
22 #
23 # The cache file consists of two parts. The first part maps head nodes
24 # to .hgtags filenodes. The second part is a history of tags. The two
25 # parts are separated by an empty line.
26 #
27 # The first part consists of lines of the form:
28 #
29 # <headrev> <headnode> [<hgtagsnode>]
30 #
31 # <headrev> is an integer revision and <headnode> is a 40 character hex
32 # node for that changeset. These redundantly identify a repository
33 # head from the time the cache was written.
34 #
35 # <tagnode> is the filenode of .hgtags on that head. Heads with no .hgtags
36 # file will have no <hgtagsnode> (just 2 values per line).
37 #
38 # The filenode cache is ordered from tip to oldest (which is part of why
39 # <headrev> is there: a quick check of the tip from when the cache was
40 # written against the current tip is all that is needed to check whether
41 # the cache is up to date).
42 #
43 # The purpose of the filenode cache is to avoid the most expensive part
44 # of finding global tags, which is looking up the .hgtags filenode in the
45 # manifest for each head. This can take over a minute on repositories
46 # that have large manifests and many heads.
47 #
48 # The second part of the tags cache consists of lines of the form:
49 #
50 # <node> <tag>
51 #
52 # (This format is identical to that of .hgtags files.)
53 #
54 # <tag> is the tag name and <node> is the 40 character hex changeset
55 # the tag is associated with.
56 #
57 # Tags are written sorted by tag name.
58 #
59 # Tags associated with multiple changesets have an entry for each changeset.
60 # The most recent changeset (in terms of revlog ordering for the head
61 # setting it) for each tag is last.
62
21 def findglobaltags(ui, repo, alltags, tagtypes): 63 def findglobaltags(ui, repo, alltags, tagtypes):
22 '''Find global tags in repo by reading .hgtags from every head that 64 '''Find global tags in a repo.
23 has a distinct version of it, using a cache to avoid excess work. 65
24 Updates the dicts alltags, tagtypes in place: alltags maps tag name 66 "alltags" maps tag name to (node, hist) 2-tuples.
25 to (node, hist) pair (see _readtags() below), and tagtypes maps tag 67
26 name to tag type ("global" in this case).''' 68 "tagtypes" maps tag name to tag type. Global tags always have the
69 "global" tag type.
70
71 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
72 should be passed in.
73
74 The tags cache is read and updated as a side-effect of calling.
75 '''
27 # This is so we can be lazy and assume alltags contains only global 76 # This is so we can be lazy and assume alltags contains only global
28 # tags when we pass it to _writetagcache(). 77 # tags when we pass it to _writetagcache().
29 assert len(alltags) == len(tagtypes) == 0, \ 78 assert len(alltags) == len(tagtypes) == 0, \
30 "findglobaltags() should be called first" 79 "findglobaltags() should be called first"
31 80
36 # cases where a global tag should outrank a local tag but won't, 85 # cases where a global tag should outrank a local tag but won't,
37 # because cachetags does not contain rank info? 86 # because cachetags does not contain rank info?
38 _updatetags(cachetags, 'global', alltags, tagtypes) 87 _updatetags(cachetags, 'global', alltags, tagtypes)
39 return 88 return
40 89
41 seen = set() # set of fnode 90 seen = set() # set of fnode
42 fctx = None 91 fctx = None
43 for head in reversed(heads): # oldest to newest 92 for head in reversed(heads): # oldest to newest
44 assert head in repo.changelog.nodemap, \ 93 assert head in repo.changelog.nodemap, \
45 "tag cache returned bogus head %s" % short(head) 94 "tag cache returned bogus head %s" % short(head)
46 95
47 fnode = tagfnode.get(head) 96 fnode = tagfnode.get(head)
48 if fnode and fnode not in seen: 97 if fnode and fnode not in seen:
58 # and update the cache (if necessary) 107 # and update the cache (if necessary)
59 if shouldwrite: 108 if shouldwrite:
60 _writetagcache(ui, repo, heads, tagfnode, alltags) 109 _writetagcache(ui, repo, heads, tagfnode, alltags)
61 110
62 def readlocaltags(ui, repo, alltags, tagtypes): 111 def readlocaltags(ui, repo, alltags, tagtypes):
63 '''Read local tags in repo. Update alltags and tagtypes.''' 112 '''Read local tags in repo. Update alltags and tagtypes.'''
64 try: 113 try:
65 data = repo.vfs.read("localtags") 114 data = repo.vfs.read("localtags")
66 except IOError, inst: 115 except IOError, inst:
67 if inst.errno != errno.ENOENT: 116 if inst.errno != errno.ENOENT:
68 raise 117 raise
84 133
85 _updatetags(filetags, "local", alltags, tagtypes) 134 _updatetags(filetags, "local", alltags, tagtypes)
86 135
87 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False): 136 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
88 '''Read tag definitions from a file (or any source of lines). 137 '''Read tag definitions from a file (or any source of lines).
138
89 This function returns two sortdicts with similar information: 139 This function returns two sortdicts with similar information:
140
90 - the first dict, bintaghist, contains the tag information as expected by 141 - the first dict, bintaghist, contains the tag information as expected by
91 the _readtags function, i.e. a mapping from tag name to (node, hist): 142 the _readtags function, i.e. a mapping from tag name to (node, hist):
92 - node is the node id from the last line read for that name, 143 - node is the node id from the last line read for that name,
93 - hist is the list of node ids previously associated with it (in file 144 - hist is the list of node ids previously associated with it (in file
94 order). All node ids are binary, not hex. 145 order). All node ids are binary, not hex.
146
95 - the second dict, hextaglines, is a mapping from tag name to a list of 147 - the second dict, hextaglines, is a mapping from tag name to a list of
96 [hexnode, line number] pairs, ordered from the oldest to the newest node. 148 [hexnode, line number] pairs, ordered from the oldest to the newest node.
149
97 When calcnodelines is False the hextaglines dict is not calculated (an 150 When calcnodelines is False the hextaglines dict is not calculated (an
98 empty dict is returned). This is done to improve this function's 151 empty dict is returned). This is done to improve this function's
99 performance in cases where the line numbers are not needed. 152 performance in cases where the line numbers are not needed.
100 ''' 153 '''
101 154
137 bintaghist[name].append(nodebin) 190 bintaghist[name].append(nodebin)
138 return bintaghist, hextaglines 191 return bintaghist, hextaglines
139 192
140 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False): 193 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
141 '''Read tag definitions from a file (or any source of lines). 194 '''Read tag definitions from a file (or any source of lines).
142 Return a mapping from tag name to (node, hist): node is the node id 195
143 from the last line read for that name, and hist is the list of node 196 Returns a mapping from tag name to (node, hist).
144 ids previously associated with it (in file order). All node ids are 197
145 binary, not hex.''' 198 "node" is the node id from the last line read for that name. "hist"
199 is the list of node ids previously associated with it (in file order).
200 All node ids are binary, not hex.
201 '''
146 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode, 202 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
147 calcnodelines=calcnodelines) 203 calcnodelines=calcnodelines)
148 for tag, taghist in filetags.items(): 204 for tag, taghist in filetags.items():
149 filetags[tag] = (taghist[-1], taghist[:-1]) 205 filetags[tag] = (taghist[-1], taghist[:-1])
150 return filetags 206 return filetags
172 else: 228 else:
173 tagtypes[name] = tagtype 229 tagtypes[name] = tagtype
174 ahist.extend([n for n in bhist if n not in ahist]) 230 ahist.extend([n for n in bhist if n not in ahist])
175 alltags[name] = anode, ahist 231 alltags[name] = anode, ahist
176 232
177
178 # The tag cache only stores info about heads, not the tag contents
179 # from each head. I.e. it doesn't try to squeeze out the maximum
180 # performance, but is simpler has a better chance of actually
181 # working correctly. And this gives the biggest performance win: it
182 # avoids looking up .hgtags in the manifest for every head, and it
183 # can avoid calling heads() at all if there have been no changes to
184 # the repo.
185
186 def _readtagcache(ui, repo): 233 def _readtagcache(ui, repo):
187 '''Read the tag cache and return a tuple (heads, fnodes, cachetags, 234 '''Read the tag cache.
188 shouldwrite). If the cache is completely up-to-date, cachetags is a 235
189 dict of the form returned by _readtags(); otherwise, it is None and 236 Returns a tuple (heads, fnodes, cachetags, shouldwrite).
190 heads and fnodes are set. In that case, heads is the list of all 237
191 heads currently in the repository (ordered from tip to oldest) and 238 If the cache is completely up-to-date, "cachetags" is a dict of the
192 fnodes is a mapping from head to .hgtags filenode. If those two are 239 form returned by _readtags() and "heads" and "fnodes" are None and
193 set, caller is responsible for reading tag info from each head.''' 240 "shouldwrite" is False.
241
242 If the cache is not up to date, "cachetags" is None. "heads" is a list
243 of all heads currently in the repository, ordered from tip to oldest.
244 "fnodes" is a mapping from head to .hgtags filenode. "shouldwrite" is
245 True.
246
247 If the cache is not up to date, the caller is responsible for reading tag
248 info from each returned head. (See findglobaltags().)
249 '''
194 250
195 try: 251 try:
196 cachefile = repo.vfs('cache/tags', 'r') 252 cachefile = repo.vfs('cache/tags', 'r')
197 # force reading the file for static-http 253 # force reading the file for static-http
198 cachelines = iter(cachefile) 254 cachelines = iter(cachefile)
199 except IOError: 255 except IOError:
200 cachefile = None 256 cachefile = None
201 257
202 # The cache file consists of lines like 258 cacherevs = [] # list of headrev
203 # <headrev> <headnode> [<tagnode>] 259 cacheheads = [] # list of headnode
204 # where <headrev> and <headnode> redundantly identify a repository 260 cachefnode = {} # map headnode to filenode
205 # head from the time the cache was written, and <tagnode> is the
206 # filenode of .hgtags on that head. Heads with no .hgtags file will
207 # have no <tagnode>. The cache is ordered from tip to oldest (which
208 # is part of why <headrev> is there: a quick visual check is all
209 # that's required to ensure correct order).
210 #
211 # This information is enough to let us avoid the most expensive part
212 # of finding global tags, which is looking up <tagnode> in the
213 # manifest for each head.
214 cacherevs = [] # list of headrev
215 cacheheads = [] # list of headnode
216 cachefnode = {} # map headnode to filenode
217 if cachefile: 261 if cachefile:
218 try: 262 try:
219 for line in cachelines: 263 for line in cachelines:
220 if line == "\n": 264 if line == "\n":
221 break 265 break
299 # Caller has to iterate over all heads, but can use the filenodes in 343 # Caller has to iterate over all heads, but can use the filenodes in
300 # cachefnode to get to each .hgtags revision quickly. 344 # cachefnode to get to each .hgtags revision quickly.
301 return (repoheads, cachefnode, None, True) 345 return (repoheads, cachefnode, None, True)
302 346
303 def _writetagcache(ui, repo, heads, tagfnode, cachetags): 347 def _writetagcache(ui, repo, heads, tagfnode, cachetags):
304
305 try: 348 try:
306 cachefile = repo.vfs('cache/tags', 'w', atomictemp=True) 349 cachefile = repo.vfs('cache/tags', 'w', atomictemp=True)
307 except (OSError, IOError): 350 except (OSError, IOError):
308 return 351 return
309 352