tags: support 'instant' tag retrieval (issue548)
authorGreg Ward <greg-hg@gerg.ca>
Thu, 16 Jul 2009 10:41:19 -0400
changeset 9152 4017291c4c48
parent 9151 f528d1a93491
child 9153 6adc899c98d0
tags: support 'instant' tag retrieval (issue548) - modify _readtagcache() and _writetagcache() to read/write tag->node mapping for global tags - if (and only if) tip unchanged, use that cached mapping to avoid reading any revisions of .hgtags - change so tag names are UTF-8 in memory in tags.py, and converted to local encoding as late as possible (in localrepository._findtags())
mercurial/localrepo.py
mercurial/tags.py
tests/test-mq.out
tests/test-tags
tests/test-tags.out
--- a/mercurial/localrepo.py	Thu Jul 16 10:39:42 2009 -0400
+++ b/mercurial/localrepo.py	Thu Jul 16 10:41:19 2009 -0400
@@ -265,11 +265,17 @@
         tags_.findglobaltags(self.ui, self, alltags, tagtypes)
         tags_.readlocaltags(self.ui, self, alltags, tagtypes)
 
+        # Build the return dicts.  Have to re-encode tag names because
+        # the tags module always uses UTF-8 (in order not to lose info
+        # writing to the cache), but the rest of Mercurial wants them in
+        # local encoding.
         tags = {}
         for (name, (node, hist)) in alltags.iteritems():
             if node != nullid:
-                tags[name] = node
+                tags[encoding.tolocal(name)] = node
         tags['tip'] = self.changelog.tip()
+        tagtypes = dict([(encoding.tolocal(name), value)
+                         for (name, value) in tagtypes.iteritems()])
         return (tags, tagtypes)
 
     def tagtype(self, tagname):
--- a/mercurial/tags.py	Thu Jul 16 10:39:42 2009 -0400
+++ b/mercurial/tags.py	Thu Jul 16 10:41:19 2009 -0400
@@ -59,7 +59,19 @@
 
 def findglobaltags2(ui, repo, alltags, tagtypes):
     '''Same as findglobaltags1(), but with caching.'''
-    (heads, tagfnode, shouldwrite) = _readtagcache(ui, repo)
+    # This is so we can be lazy and assume alltags contains only global
+    # tags when we pass it to _writetagcache().
+    assert len(alltags) == len(tagtypes) == 0, \
+           "findglobaltags() should be called first"
+
+    (heads, tagfnode, cachetags, shouldwrite) = _readtagcache(ui, repo)
+    if cachetags is not None:
+        assert not shouldwrite
+        # XXX is this really 100% correct?  are there oddball special
+        # cases where a global tag should outrank a local tag but won't,
+        # because cachetags does not contain rank info?
+        _updatetags(cachetags, 'global', alltags, tagtypes)
+        return
 
     _debug(ui, "reading tags from %d head(s): %s\n"
            % (len(heads), map(short, reversed(heads))))
@@ -82,7 +94,7 @@
 
     # and update the cache (if necessary)
     if shouldwrite:
-        _writetagcache(ui, repo, heads, tagfnode)
+        _writetagcache(ui, repo, heads, tagfnode, alltags)
 
 # Set this to findglobaltags1 to disable tag caching.
 findglobaltags = findglobaltags2
@@ -90,16 +102,17 @@
 def readlocaltags(ui, repo, alltags, tagtypes):
     '''Read local tags in repo.  Update alltags and tagtypes.'''
     try:
-        data = encoding.fromlocal(repo.opener("localtags").read())
-        # localtags are stored in the local character set
-        # while the internal tag table is stored in UTF-8
+        # localtags is in the local encoding; re-encode to UTF-8 on
+        # input for consistency with the rest of this module.
+        data = repo.opener("localtags").read()
         filetags = _readtags(
-            ui, repo, data.splitlines(), "localtags")
+            ui, repo, data.splitlines(), "localtags",
+            recode=encoding.fromlocal)
         _updatetags(filetags, "local", alltags, tagtypes)
     except IOError:
         pass
 
-def _readtags(ui, repo, lines, fn):
+def _readtags(ui, repo, lines, fn, recode=None):
     '''Read tag definitions from a file (or any source of lines).
     Return a mapping from tag name to (node, hist): node is the node id
     from the last line read for that name, and hist is the list of node
@@ -121,7 +134,9 @@
         except ValueError:
             warn(_("cannot parse entry"))
             continue
-        name = encoding.tolocal(name.strip()) # stored in UTF-8
+        name = name.strip()
+        if recode:
+            name = recode(name)
         try:
             nodebin = bin(nodehex)
         except TypeError:
@@ -173,11 +188,13 @@
 # the repo.
 
 def _readtagcache(ui, repo):
-    '''Read the tag cache and return a tuple (heads, fnodes,
-    shouldwrite).  heads is the list of all heads currently in the
-    repository (ordered from tip to oldest) and fnodes is a mapping from
-    head to .hgtags filenode.  Caller is responsible for reading tag
-    info from each head.'''
+    '''Read the tag cache and return a tuple (heads, fnodes, cachetags,
+    shouldwrite).  If the cache is completely up-to-date, cachetags is a
+    dict of the form returned by _readtags(); otherwise, it is None and
+    heads and fnodes are set.  In that case, heads is the list of all
+    heads currently in the repository (ordered from tip to oldest) and
+    fnodes is a mapping from head to .hgtags filenode.  If those two are
+    set, caller is responsible for reading tag info from each head.'''
 
     try:
         cachefile = repo.opener('tags.cache', 'r')
@@ -202,6 +219,8 @@
     cachefnode = {}                     # map headnode to filenode
     if cachefile:
         for line in cachefile:
+            if line == "\n":
+                break
             line = line.rstrip().split()
             cacherevs.append(int(line[0]))
             headnode = bin(line[1])
@@ -210,8 +229,6 @@
                 fnode = bin(line[2])
                 cachefnode[headnode] = fnode
 
-        cachefile.close()
-
     tipnode = repo.changelog.tip()
     tiprev = len(repo.changelog) - 1
 
@@ -221,14 +238,18 @@
     # have been destroyed by strip or rollback.)
     if cacheheads and cacheheads[0] == tipnode and cacherevs[0] == tiprev:
         _debug(ui, "tag cache: tip unchanged\n")
-        return (cacheheads, cachefnode, False)
+        tags = _readtags(ui, repo, cachefile, cachefile.name)
+        cachefile.close()
+        return (None, None, tags, False)
+    if cachefile:
+        cachefile.close()               # ignore rest of file
         
     repoheads = repo.heads()
 
     # Case 2 (uncommon): empty repo; get out quickly and don't bother
     # writing an empty cache.
     if repoheads == [nullid]:
-        return ([], {}, False)
+        return ([], {}, {}, False)
 
     # Case 3 (uncommon): cache file missing or empty.
     if not cacheheads:
@@ -277,9 +298,9 @@
 
     # Caller has to iterate over all heads, but can use the filenodes in
     # cachefnode to get to each .hgtags revision quickly.
-    return (repoheads, cachefnode, True)
+    return (repoheads, cachefnode, None, True)
 
-def _writetagcache(ui, repo, heads, tagfnode):
+def _writetagcache(ui, repo, heads, tagfnode, cachetags):
 
     cachefile = repo.opener('tags.cache', 'w', atomictemp=True)
     _debug(ui, 'writing cache file %s\n' % cachefile.name)
@@ -306,5 +327,13 @@
         else:
             cachefile.write('%d %s\n' % (rev, hex(head)))
 
+    # Tag names in the cache are in UTF-8 -- which is the whole reason
+    # we keep them in UTF-8 throughout this module.  If we converted
+    # them local encoding on input, we would lose info writing them to
+    # the cache.
+    cachefile.write('\n')
+    for (name, (node, hist)) in cachetags.iteritems():
+        cachefile.write("%s %s\n" % (hex(node), name))
+
     cachefile.rename()
     cachefile.close()
--- a/tests/test-mq.out	Thu Jul 16 10:39:42 2009 -0400
+++ b/tests/test-mq.out	Thu Jul 16 10:41:19 2009 -0400
@@ -113,10 +113,12 @@
 % qpush with dump of tag cache
 .hg/tags.cache (pre qpush):
 1
+
 applying test.patch
 now at: test.patch
 .hg/tags.cache (post qpush):
 2
+
 % pop/push outside repo
 popping test.patch
 patch queue now empty
--- a/tests/test-tags	Thu Jul 16 10:39:42 2009 -0400
+++ b/tests/test-tags	Thu Jul 16 10:41:19 2009 -0400
@@ -4,6 +4,10 @@
     [ -f .hg/tags.cache ] && echo "tag cache exists" || echo "no tag cache"
 }
 
+# XXX need to test that the tag cache works when we strip an old head
+# and add a new one rooted off non-tip: i.e. node and rev of tip are the
+# same, but stuff has changed behind tip.
+
 echo "% setup"
 mkdir t
 cd t
--- a/tests/test-tags.out	Thu Jul 16 10:39:42 2009 -0400
+++ b/tests/test-tags.out	Thu Jul 16 10:41:19 2009 -0400
@@ -46,9 +46,6 @@
 tip                                8:c4be69a18c11
 first                              0:acb14030fe0a
 changeset:   8:c4be69a18c11
-.hgtags@75d9f02dfe28, line 2: cannot parse entry
-.hgtags@75d9f02dfe28, line 4: node 'foo' is not well formed
-.hgtags@c4be69a18c11, line 2: node 'x' is not well formed
 tag:         tip
 parent:      3:ac5e980c4dc0
 user:        test
@@ -80,6 +77,8 @@
 4 0c192d7d5e6b78a714de54a2e9627952a877e25a 0c04f2a8af31de17fab7422878ee5a2dadbc943d
 3 6fa450212aeb2a21ed616a54aea39a4a27894cd7 7d3b718c964ef37b89e550ebdafd5789e76ce1b0
 2 7a94127795a33c10a370c93f731fd9fea0b79af6 0c04f2a8af31de17fab7422878ee5a2dadbc943d
+
+78391a272241d70354aa14c874552cad6b51bb42 bar
 % test tag removal
 changeset:   5:5f6e8655b1c7
 tag:         tip