annotate: do not construct attr.s object per line while computing history
authorYuya Nishihara <yuya@tcha.org>
Mon, 12 Mar 2018 20:45:10 +0900
changeset 37064 434e520adb8c
parent 37063 39304dd63589
child 37065 b235bde38a83
annotate: do not construct attr.s object per line while computing history Unfortunately, good abstraction has a cost. It's way slower to construct an annotateline() object than creating a plain tuple or a list. This patch changes the internal data structure from row-based to columnar, so the decorate() function can be instant (i.e. no Python in hot loop.) For code readability, the outermost tuple is switched to an attr.s object instead. (original, row-based attr.s) $ hg annot mercurial/commands.py --time > /dev/null time: real 11.470 secs (user 11.400+0.000 sys 0.070+0.000) $ hg annot mercurial/commands.py --time --line-number > /dev/null time: real 39.590 secs (user 39.500+0.000 sys 0.080+0.000) (this patch, columnar) $ hg annot mercurial/commands.py --time > /dev/null time: real 11.780 secs (user 11.710+0.000 sys 0.070+0.000) $ hg annot mercurial/commands.py --time --line-number > /dev/null time: real 12.240 secs (user 12.170+0.000 sys 0.090+0.000) (cf. 4.3.3, row-based tuple) $ hg annot mercurial/commands.py --time --line-number > /dev/null time: real 19.540 secs (user 19.460+0.000 sys 0.080+0.000)
mercurial/dagop.py
mercurial/pycompat.py
tests/test-annotate.py
--- a/mercurial/dagop.py	Thu Mar 15 18:05:49 2018 -0700
+++ b/mercurial/dagop.py	Mon Mar 12 20:45:10 2018 +0900
@@ -369,6 +369,15 @@
     # Whether this annotation was the result of a skip-annotate.
     skip = attr.ib(default=False)
 
+@attr.s(slots=True, frozen=True)
+class _annotatedfile(object):
+    # list indexed by lineno - 1
+    fctxs = attr.ib()
+    linenos = attr.ib()
+    skips = attr.ib()
+    # full file content
+    text = attr.ib()
+
 def _countlines(text):
     if text.endswith("\n"):
         return text.count("\n")
@@ -385,7 +394,7 @@
 
     See test-annotate.py for unit tests.
     '''
-    pblocks = [(parent, mdiff.allblocks(parent[1], child[1], opts=diffopts))
+    pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
                for parent in parents]
 
     if skipchild:
@@ -398,7 +407,9 @@
             # Changed blocks ('!') or blocks made only of blank lines ('~')
             # belong to the child.
             if t == '=':
-                child[0][b1:b2] = parent[0][a1:a2]
+                child.fctxs[b1:b2] = parent.fctxs[a1:a2]
+                child.linenos[b1:b2] = parent.linenos[a1:a2]
+                child.skips[b1:b2] = parent.skips[a1:a2]
 
     if skipchild:
         # Now try and match up anything that couldn't be matched,
@@ -419,9 +430,11 @@
             for (a1, a2, b1, b2), _t in blocks:
                 if a2 - a1 >= b2 - b1:
                     for bk in xrange(b1, b2):
-                        if child[0][bk].fctx == childfctx:
+                        if child.fctxs[bk] == childfctx:
                             ak = min(a1 + (bk - b1), a2 - 1)
-                            child[0][bk] = attr.evolve(parent[0][ak], skip=True)
+                            child.fctxs[bk] = parent.fctxs[ak]
+                            child.linenos[bk] = parent.linenos[ak]
+                            child.skips[bk] = True
                 else:
                     remaining[idx][1].append((a1, a2, b1, b2))
 
@@ -430,9 +443,11 @@
         for parent, blocks in remaining:
             for a1, a2, b1, b2 in blocks:
                 for bk in xrange(b1, b2):
-                    if child[0][bk].fctx == childfctx:
+                    if child.fctxs[bk] == childfctx:
                         ak = min(a1 + (bk - b1), a2 - 1)
-                        child[0][bk] = attr.evolve(parent[0][ak], skip=True)
+                        child.fctxs[bk] = parent.fctxs[ak]
+                        child.linenos[bk] = parent.linenos[ak]
+                        child.skips[bk] = True
     return child
 
 def annotate(base, parents, linenumber=False, skiprevs=None, diffopts=None):
@@ -443,11 +458,13 @@
 
     if linenumber:
         def decorate(text, fctx):
-            return ([annotateline(fctx=fctx, lineno=i)
-                     for i in xrange(1, _countlines(text) + 1)], text)
+            n = _countlines(text)
+            linenos = pycompat.rangelist(1, n + 1)
+            return _annotatedfile([fctx] * n, linenos, [False] * n, text)
     else:
         def decorate(text, fctx):
-            return ([annotateline(fctx=fctx)] * _countlines(text), text)
+            n = _countlines(text)
+            return _annotatedfile([fctx] * n, [False] * n, [False] * n, text)
 
     # This algorithm would prefer to be recursive, but Python is a
     # bit recursion-hostile. Instead we do an iterative
@@ -501,8 +518,10 @@
             hist[f] = curr
             del pcache[f]
 
-    lineattrs, text = hist[base]
-    return pycompat.ziplist(lineattrs, mdiff.splitnewlines(text))
+    a = hist[base]
+    return [(annotateline(fctx, lineno, skip), line)
+            for fctx, lineno, skip, line
+            in zip(a.fctxs, a.linenos, a.skips, mdiff.splitnewlines(a.text))]
 
 def toposort(revs, parentsfunc, firstbranch=()):
     """Yield revisions from heads to roots one (topo) branch at a time.
--- a/mercurial/pycompat.py	Thu Mar 15 18:05:49 2018 -0700
+++ b/mercurial/pycompat.py	Mon Mar 12 20:45:10 2018 +0900
@@ -71,6 +71,9 @@
     def maplist(*args):
         return list(map(*args))
 
+    def rangelist(*args):
+        return list(range(*args))
+
     def ziplist(*args):
         return list(zip(*args))
 
@@ -348,6 +351,7 @@
     bytesio = cStringIO.StringIO
     stringio = bytesio
     maplist = map
+    rangelist = range
     ziplist = zip
     rawinput = raw_input
     getargspec = inspect.getargspec
--- a/tests/test-annotate.py	Thu Mar 15 18:05:49 2018 -0700
+++ b/tests/test-annotate.py	Mon Mar 12 20:45:10 2018 +0900
@@ -5,12 +5,18 @@
 
 from mercurial import (
     mdiff,
+    pycompat,
 )
 from mercurial.dagop import (
     annotateline,
+    _annotatedfile,
     _annotatepair,
 )
 
+def tr(a):
+    return [annotateline(fctx, lineno, skip)
+            for fctx, lineno, skip in zip(a.fctxs, a.linenos, a.skips)]
+
 class AnnotateTests(unittest.TestCase):
     """Unit tests for annotate code."""
 
@@ -26,16 +32,16 @@
         diffopts = mdiff.diffopts()
 
         def decorate(text, fctx):
-            return ([annotateline(fctx=fctx, lineno=i)
-                     for i in range(1, text.count(b'\n') + 1)],
-                    text)
+            n = text.count(b'\n')
+            linenos = pycompat.rangelist(1, n + 1)
+            return _annotatedfile([fctx] * n, linenos, [False] * n, text)
 
         # Basic usage
 
         oldann = decorate(olddata, oldfctx)
         p1ann = decorate(p1data, p1fctx)
         p1ann = _annotatepair([oldann], p1fctx, p1ann, False, diffopts)
-        self.assertEqual(p1ann[0], [
+        self.assertEqual(tr(p1ann), [
             annotateline(b'old', 1),
             annotateline(b'old', 2),
             annotateline(b'p1', 3),
@@ -43,7 +49,7 @@
 
         p2ann = decorate(p2data, p2fctx)
         p2ann = _annotatepair([oldann], p2fctx, p2ann, False, diffopts)
-        self.assertEqual(p2ann[0], [
+        self.assertEqual(tr(p2ann), [
             annotateline(b'old', 1),
             annotateline(b'p2', 2),
             annotateline(b'p2', 3),
@@ -54,7 +60,7 @@
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p1ann, p2ann], childfctx, childann, False,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'c', 2),
             annotateline(b'p2', 2),
@@ -65,7 +71,7 @@
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p2ann, p1ann], childfctx, childann, False,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'c', 2),
             annotateline(b'p1', 3),
@@ -78,7 +84,7 @@
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p1ann, p2ann], childfctx, childann, True,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'old', 2, True),
             # note that this line was carried over from earlier so it is *not*
@@ -91,7 +97,7 @@
         childann = decorate(childdata, childfctx)
         childann = _annotatepair([p2ann, p1ann], childfctx, childann, True,
                                  diffopts)
-        self.assertEqual(childann[0], [
+        self.assertEqual(tr(childann), [
             annotateline(b'old', 1),
             annotateline(b'old', 2, True),
             annotateline(b'p1', 3),