tags: introduce _readtaghist function
authorAngel Ezquerra <angel.ezquerra@gmail.com>
Sat, 28 Jun 2014 01:42:39 +0200
changeset 21892 89cdebc31cda
parent 21891 db8a27d92818
child 21893 e967c3b08705
tags: introduce _readtaghist function The existing _readtags function has been modified a little and renamed _readtaghist. A new _readtaghist function has been added, which is a wrappger around _readtaghist. Its output is the same as the old _readtaghist. The purpose of this change is to make it possible to automatically merge tag files. In order to do so we will need to get the line numbers for each of the tag-node pairs on the first merge parent. This is not used yet, but will be used on a follow up patch that will introduce an automatic tag merge algorithm. I performed some tests to compare the effect of this change. I used timeit to run the test-tags.t test a 9 times with and without this patch. The results were: - without this patch: 3 loops, best of 3: 8.55 sec per loop - with this patch: 3 loops, best of 3: 8.49 sec per loop The the test was on average was slightly faster with this patch (although the difference was probably not statistically significant).
mercurial/tags.py
--- a/mercurial/tags.py	Fri Jun 20 00:42:35 2014 +0900
+++ b/mercurial/tags.py	Sat Jun 28 01:42:39 2014 +0200
@@ -84,20 +84,29 @@
 
     _updatetags(filetags, "local", alltags, tagtypes)
 
-def _readtags(ui, repo, lines, fn, recode=None):
+def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
     '''Read tag definitions from a file (or any source of lines).
-    Return a mapping from tag name to (node, hist): node is the node id
-    from the last line read for that name, and hist is the list of node
-    ids previously associated with it (in file order).  All node ids are
-    binary, not hex.'''
+    This function returns two sortdicts with similar information:
+    - the first dict, bingtaglist, contains the tag information as expected by
+      the _readtags function, i.e. a mapping from tag name to (node, hist):
+        - node is the node id from the last line read for that name,
+        - hist is the list of node ids previously associated with it (in file
+          order).  All node ids are binary, not hex.
+    - the second dict, hextaglines, is a mapping from tag name to a list of
+      [hexnode, line number] pairs, ordered from the oldest to the newest node.
+    When calcnodelines is False the hextaglines dict is not calculated (an
+    empty dict is returned). This is done to improve this function's
+    performance in cases where the line numbers are not needed.
+    '''
 
-    filetags = util.sortdict()  # map tag name to (node, hist)
+    bintaghist = util.sortdict()
+    hextaglines = util.sortdict()
     count = 0
 
     def warn(msg):
         ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
 
-    for line in lines:
+    for nline, line in enumerate(lines):
         count += 1
         if not line:
             continue
@@ -116,11 +125,28 @@
             continue
 
         # update filetags
-        hist = []
-        if name in filetags:
-            n, hist = filetags[name]
-            hist.append(n)
-        filetags[name] = (nodebin, hist)
+        if calcnodelines:
+            # map tag name to a list of line numbers
+            if name not in hextaglines:
+                hextaglines[name] = []
+            hextaglines[name].append([nodehex, nline])
+            continue
+        # map tag name to (node, hist)
+        if name not in bintaghist:
+            bintaghist[name] = []
+        bintaghist[name].append(nodebin)
+    return bintaghist, hextaglines
+
+def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
+    '''Read tag definitions from a file (or any source of lines).
+    Return a mapping from tag name to (node, hist): node is the node id
+    from the last line read for that name, and hist is the list of node
+    ids previously associated with it (in file order).  All node ids are
+    binary, not hex.'''
+    filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
+                                       calcnodelines=calcnodelines)
+    for tag, taghist in filetags.items():
+        filetags[tag] = (taghist[-1], taghist[:-1])
     return filetags
 
 def _updatetags(filetags, tagtype, alltags, tagtypes):