changeset 27945:4186d359046a stable

log: speed up single file log with hidden revs (issue4747) On repos with lots of heads, the filelog() code could spend several minutes decompressing manifests. This change instead tries to efficiently scan the changelog for candidates and decompress as few manifests as possible. This is a regression introduced in 3.3 by the linkrev adjustment code. Prior to that, filelog was nearly instant. For the repo in the bug report, this improves time of a simple log command from ~3 minutes to ~.5 seconds, a 360x speedup. For the main Mercurial repo, a log of commands.py slows down from 1.14s to 1.45s, a 27% slowdown. This is still faster than the file() revset, which takes 2.1 seconds.
author Matt Mackall <mpm@selenic.com>
date Fri, 22 Jan 2016 12:08:20 -0600
parents 4511e8dac4c7
children ca8d2b73155d
files mercurial/revset.py
diffstat 1 files changed, 30 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/revset.py	Sat Jan 23 23:32:49 2016 -0500
+++ b/mercurial/revset.py	Fri Jan 22 12:08:20 2016 -0600
@@ -1036,88 +1036,39 @@
         files = (f for f in repo[None] if m(f))
 
     for f in files:
-        backrevref = {}  # final value for: filerev -> changerev
-        lowestchild = {} # lowest known filerev child of a filerev
-        delayed = []     # filerev with filtered linkrev, for post-processing
-        lowesthead = None # cache for manifest content of all head revisions
         fl = repo.file(f)
+        known = {}
+        scanpos = 0
         for fr in list(fl):
-            rev = fl.linkrev(fr)
-            if rev not in cl:
-                # changerev pointed in linkrev is filtered
-                # record it for post processing.
-                delayed.append((fr, rev))
+            fn = fl.node(fr)
+            if fn in known:
+                s.add(known[fn])
                 continue
-            for p in fl.parentrevs(fr):
-                if 0 <= p and p not in lowestchild:
-                    lowestchild[p] = fr
-            backrevref[fr] = rev
-            s.add(rev)
-
-        # Post-processing of all filerevs we skipped because they were
-        # filtered. If such filerevs have known and unfiltered children, this
-        # means they have an unfiltered appearance out there. We'll use linkrev
-        # adjustment to find one of these appearances. The lowest known child
-        # will be used as a starting point because it is the best upper-bound we
-        # have.
-        #
-        # This approach will fail when an unfiltered but linkrev-shadowed
-        # appearance exists in a head changeset without unfiltered filerev
-        # children anywhere.
-        while delayed:
-            # must be a descending iteration. To slowly fill lowest child
-            # information that is of potential use by the next item.
-            fr, rev = delayed.pop()
-            lkr = rev
-
-            child = lowestchild.get(fr)
-
-            if child is None:
-                # search for existence of this file revision in a head revision.
-                # There are three possibilities:
-                # - the revision exists in a head and we can find an
-                #   introduction from there,
-                # - the revision does not exist in a head because it has been
-                #   changed since its introduction: we would have found a child
-                #   and be in the other 'else' clause,
-                # - all versions of the revision are hidden.
-                if lowesthead is None:
-                    lowesthead = {}
-                    for h in repo.heads():
-                        fnode = repo[h].manifest().get(f)
-                        if fnode is not None:
-                            lowesthead[fl.rev(fnode)] = h
-                headrev = lowesthead.get(fr)
-                if headrev is None:
-                    # content is nowhere unfiltered
-                    continue
-                rev = repo[headrev][f].introrev()
-            else:
-                # the lowest known child is a good upper bound
-                childcrev = backrevref[child]
-                # XXX this does not guarantee returning the lowest
-                # introduction of this revision, but this gives a
-                # result which is a good start and will fit in most
-                # cases. We probably need to fix the multiple
-                # introductions case properly (report each
-                # introduction, even for identical file revisions)
-                # once and for all at some point anyway.
-                for p in repo[childcrev][f].parents():
-                    if p.filerev() == fr:
-                        rev = p.rev()
-                        break
-                if rev == lkr:  # no shadowed entry found
-                    # XXX This should never happen unless some manifest points
-                    # to biggish file revisions (like a revision that uses a
-                    # parent that never appears in the manifest ancestors)
-                    continue
-
-            # Fill the data for the next iteration.
-            for p in fl.parentrevs(fr):
-                if 0 <= p and p not in lowestchild:
-                    lowestchild[p] = fr
-            backrevref[fr] = rev
-            s.add(rev)
+
+            lr = fl.linkrev(fr)
+            if lr in cl:
+                s.add(lr)
+            elif scanpos is not None:
+                # lowest matching changeset is filtered, scan further
+                # ahead in changelog
+                start = max(lr, scanpos) + 1
+                scanpos = None
+                for r in cl.revs(start):
+                    # minimize parsing of non-matching entries
+                    if f in cl.revision(r) and f in cl.readfiles(r):
+                        try:
+                            # try to use manifest delta fastpath
+                            n = repo[r].filenode(f)
+                            if n not in known:
+                                if n == fn:
+                                    s.add(r)
+                                    scanpos = r
+                                    break
+                                else:
+                                    known[n] = r
+                        except error.ManifestLookupError:
+                            # deletion in changelog
+                            continue
 
     return subset & s