changeset 45117:b1e51ef4e536

phases: sparsify phase lists When the internal and archived phase was added, allphase became a large, sparsely populated list. This dramatically increased the number of lookup operations for public relations in `phasecache.phase`. As a first step, define allphases and related lists explicitly to contain only the actual phases. Make phasenames a dictionary and create corresponding dictionaries for mapping phase names back to numbers. Adjust various list to be sparse as well with the exception of phaseroots and phasesets members of phasecache. Keep those as a separate step as it involves changes to the C module. Differential Revision: https://phab.mercurial-scm.org/D8697
author Joerg Sonnenberger <joerg@bec.de>
date Wed, 08 Jul 2020 00:15:15 +0200
parents 361a7444bc41
children d0ef8c1dddd4
files mercurial/bundle2.py mercurial/exchange.py mercurial/exchangev2.py mercurial/phases.py
diffstat 4 files changed, 46 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/bundle2.py	Tue Jul 07 22:00:02 2020 +0200
+++ b/mercurial/bundle2.py	Wed Jul 08 00:15:15 2020 +0200
@@ -2208,7 +2208,7 @@
         b'remote repository changed while pushing - please try again '
         b'(%s is %s expected %s)'
     )
-    for expectedphase, nodes in enumerate(phasetonodes):
+    for expectedphase, nodes in pycompat.iteritems(phasetonodes):
         for n in nodes:
             actualphase = phasecache.phase(unfi, cl.rev(n))
             if actualphase != expectedphase:
--- a/mercurial/exchange.py	Tue Jul 07 22:00:02 2020 +0200
+++ b/mercurial/exchange.py	Wed Jul 08 00:15:15 2020 +0200
@@ -1024,12 +1024,12 @@
     hasphaseheads = b'heads' in b2caps.get(b'phases', ())
     if pushop.remotephases is not None and hasphaseheads:
         # check that the remote phase has not changed
-        checks = [[] for p in phases.allphases]
+        checks = {p: [] for p in phases.allphases}
         checks[phases.public].extend(pushop.remotephases.publicheads)
         checks[phases.draft].extend(pushop.remotephases.draftroots)
-        if any(checks):
-            for nodes in checks:
-                nodes.sort()
+        if any(pycompat.itervalues(checks)):
+            for phase in checks:
+                checks[phase].sort()
             checkdata = phases.binaryencode(checks)
             bundler.newpart(b'check:phases', data=checkdata)
 
@@ -1104,7 +1104,7 @@
     """push phase information through a bundle2 - binary part"""
     pushop.stepsdone.add(b'phases')
     if pushop.outdatedphases:
-        updates = [[] for p in phases.allphases]
+        updates = {p: [] for p in phases.allphases}
         updates[0].extend(h.node() for h in pushop.outdatedphases)
         phasedata = phases.binaryencode(updates)
         bundler.newpart(b'phase-heads', data=phasedata)
@@ -2658,9 +2658,9 @@
                     headsbyphase[phases.public].add(node(r))
 
         # transform data in a format used by the encoding function
-        phasemapping = []
-        for phase in phases.allphases:
-            phasemapping.append(sorted(headsbyphase[phase]))
+        phasemapping = {
+            phase: sorted(headsbyphase[phase]) for phase in phases.allphases
+        }
 
         # generate the actual part
         phasedata = phases.binaryencode(phasemapping)
--- a/mercurial/exchangev2.py	Tue Jul 07 22:00:02 2020 +0200
+++ b/mercurial/exchangev2.py	Wed Jul 08 00:15:15 2020 +0200
@@ -82,15 +82,12 @@
         phases.registernew(repo, tr, phases.draft, csetres[b'added'])
 
     # And adjust the phase of all changesets accordingly.
-    for phase in phases.phasenames:
+    for phasenumber, phase in phases.phasenames.items():
         if phase == b'secret' or not csetres[b'nodesbyphase'][phase]:
             continue
 
         phases.advanceboundary(
-            repo,
-            tr,
-            phases.phasenames.index(phase),
-            csetres[b'nodesbyphase'][phase],
+            repo, tr, phasenumber, csetres[b'nodesbyphase'][phase],
         )
 
     # Write bookmark updates.
@@ -361,7 +358,7 @@
         # so we can set the linkrev accordingly when manifests are added.
         manifestnodes[cl.rev(node)] = revision.manifest
 
-    nodesbyphase = {phase: set() for phase in phases.phasenames}
+    nodesbyphase = {phase: set() for phase in phases.phasenames.values()}
     remotebookmarks = {}
 
     # addgroup() expects a 7-tuple describing revisions. This normalizes
--- a/mercurial/phases.py	Tue Jul 07 22:00:02 2020 +0200
+++ b/mercurial/phases.py	Wed Jul 08 00:15:15 2020 +0200
@@ -128,25 +128,28 @@
 
 _fphasesentry = struct.Struct(b'>i20s')
 
-INTERNAL_FLAG = 64  # Phases for mercurial internal usage only
-HIDEABLE_FLAG = 32  # Phases that are hideable
-
 # record phase index
 public, draft, secret = range(3)
-internal = INTERNAL_FLAG | HIDEABLE_FLAG
-archived = HIDEABLE_FLAG
-allphases = list(range(internal + 1))
-trackedphases = allphases[1:]
+archived = 32  # non-continuous for compatibility
+internal = 96  # non-continuous for compatibility
+allphases = (public, draft, secret, archived, internal)
+trackedphases = (draft, secret, archived, internal)
 # record phase names
 cmdphasenames = [b'public', b'draft', b'secret']  # known to `hg phase` command
-phasenames = [None] * len(allphases)
-phasenames[: len(cmdphasenames)] = cmdphasenames
+phasenames = dict(enumerate(cmdphasenames))
 phasenames[archived] = b'archived'
 phasenames[internal] = b'internal'
+# map phase name to phase number
+phasenumber = {name: phase for phase, name in phasenames.items()}
+# like phasenumber, but also include maps for the numeric and binary
+# phase number to the phase number
+phasenumber2 = phasenumber.copy()
+phasenumber2.update({phase: phase for phase in phasenames})
+phasenumber2.update({b'%i' % phase: phase for phase in phasenames})
 # record phase property
-mutablephases = tuple(allphases[1:])
-remotehiddenphases = tuple(allphases[2:])
-localhiddenphases = tuple(p for p in allphases if p & HIDEABLE_FLAG)
+mutablephases = (draft, secret, archived, internal)
+remotehiddenphases = (secret, archived, internal)
+localhiddenphases = (internal, archived)
 
 
 def supportinternal(repo):
@@ -167,7 +170,7 @@
     """
     repo = repo.unfiltered()
     dirty = False
-    roots = [set() for i in allphases]
+    roots = [set() for i in range(max(allphases) + 1)]
     try:
         f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots')
         try:
@@ -189,11 +192,10 @@
 def binaryencode(phasemapping):
     """encode a 'phase -> nodes' mapping into a binary stream
 
-    Since phases are integer the mapping is actually a python list:
-    [[PUBLIC_HEADS], [DRAFTS_HEADS], [SECRET_HEADS]]
+    The revision lists are encoded as (phase, root) pairs.
     """
     binarydata = []
-    for phase, nodes in enumerate(phasemapping):
+    for phase, nodes in pycompat.iteritems(phasemapping):
         for head in nodes:
             binarydata.append(_fphasesentry.pack(phase, head))
     return b''.join(binarydata)
@@ -202,8 +204,9 @@
 def binarydecode(stream):
     """decode a binary stream into a 'phase -> nodes' mapping
 
-    Since phases are integer the mapping is actually a python list."""
-    headsbyphase = [[] for i in allphases]
+    The (phase, root) pairs are turned back into a dictionary with
+    the phase as index and the aggregated roots of that phase as value."""
+    headsbyphase = {i: [] for i in allphases}
     entrysize = _fphasesentry.size
     while True:
         entry = stream.read(entrysize)
@@ -427,12 +430,16 @@
             nativeroots.append(
                 pycompat.maplist(repo.changelog.rev, self.phaseroots[phase])
             )
-        return repo.changelog.computephases(nativeroots)
+        revslen, phasesets = repo.changelog.computephases(nativeroots)
+        phasesets2 = [set() for phase in range(max(allphases) + 1)]
+        for phase, phaseset in zip(allphases, phasesets):
+            phasesets2[phase] = phaseset
+        return revslen, phasesets2
 
     def _computephaserevspure(self, repo):
         repo = repo.unfiltered()
         cl = repo.changelog
-        self._phasesets = [set() for phase in allphases]
+        self._phasesets = [set() for phase in range(max(allphases) + 1)]
         lowerroots = set()
         for phase in reversed(trackedphases):
             roots = pycompat.maplist(cl.rev, self.phaseroots[phase])
@@ -533,7 +540,7 @@
 
         changes = set()  # set of revisions to be changed
         delroots = []  # set of root deleted by this path
-        for phase in pycompat.xrange(targetphase + 1, len(allphases)):
+        for phase in (phase for phase in allphases if phase > targetphase):
             # filter nodes that are not in a compatible phase already
             nodes = [
                 n for n in nodes if self.phase(repo, repo[n].rev()) >= phase
@@ -766,7 +773,7 @@
     """
     cl = repo.changelog
 
-    headsbyphase = [[] for i in allphases]
+    headsbyphase = {i: [] for i in allphases}
     # No need to keep track of secret phase; any heads in the subset that
     # are not mentioned are implicitly secret.
     for phase in allphases[:secret]:
@@ -897,13 +904,11 @@
     """
     v = ui.config(b'phases', b'new-commit')
     try:
-        return phasenames.index(v)
-    except ValueError:
-        try:
-            return int(v)
-        except ValueError:
-            msg = _(b"phases.new-commit: not a valid phase name ('%s')")
-            raise error.ConfigError(msg % v)
+        return phasenumber2[v]
+    except KeyError:
+        raise error.ConfigError(
+            _(b"phases.new-commit: not a valid phase name ('%s')") % v
+        )
 
 
 def hassecret(repo):