# HG changeset patch # User Joerg Sonnenberger # Date 1594160115 -7200 # Node ID b1e51ef4e536e355ea8c2663d5e02d4f2259fa1b # Parent 361a7444bc41ad6e24c8d1ba9e48f2b419055e85 phases: sparsify phase lists When the internal and archived phase was added, allphase became a large, sparsely populated list. This dramatically increased the number of lookup operations for public relations in `phasecache.phase`. As a first step, define allphases and related lists explicitly to contain only the actual phases. Make phasenames a dictionary and create corresponding dictionaries for mapping phase names back to numbers. Adjust various list to be sparse as well with the exception of phaseroots and phasesets members of phasecache. Keep those as a separate step as it involves changes to the C module. Differential Revision: https://phab.mercurial-scm.org/D8697 diff -r 361a7444bc41 -r b1e51ef4e536 mercurial/bundle2.py --- a/mercurial/bundle2.py Tue Jul 07 22:00:02 2020 +0200 +++ b/mercurial/bundle2.py Wed Jul 08 00:15:15 2020 +0200 @@ -2208,7 +2208,7 @@ b'remote repository changed while pushing - please try again ' b'(%s is %s expected %s)' ) - for expectedphase, nodes in enumerate(phasetonodes): + for expectedphase, nodes in pycompat.iteritems(phasetonodes): for n in nodes: actualphase = phasecache.phase(unfi, cl.rev(n)) if actualphase != expectedphase: diff -r 361a7444bc41 -r b1e51ef4e536 mercurial/exchange.py --- a/mercurial/exchange.py Tue Jul 07 22:00:02 2020 +0200 +++ b/mercurial/exchange.py Wed Jul 08 00:15:15 2020 +0200 @@ -1024,12 +1024,12 @@ hasphaseheads = b'heads' in b2caps.get(b'phases', ()) if pushop.remotephases is not None and hasphaseheads: # check that the remote phase has not changed - checks = [[] for p in phases.allphases] + checks = {p: [] for p in phases.allphases} checks[phases.public].extend(pushop.remotephases.publicheads) checks[phases.draft].extend(pushop.remotephases.draftroots) - if any(checks): - for nodes in checks: - nodes.sort() + if any(pycompat.itervalues(checks)): + for phase in checks: + checks[phase].sort() checkdata = phases.binaryencode(checks) bundler.newpart(b'check:phases', data=checkdata) @@ -1104,7 +1104,7 @@ """push phase information through a bundle2 - binary part""" pushop.stepsdone.add(b'phases') if pushop.outdatedphases: - updates = [[] for p in phases.allphases] + updates = {p: [] for p in phases.allphases} updates[0].extend(h.node() for h in pushop.outdatedphases) phasedata = phases.binaryencode(updates) bundler.newpart(b'phase-heads', data=phasedata) @@ -2658,9 +2658,9 @@ headsbyphase[phases.public].add(node(r)) # transform data in a format used by the encoding function - phasemapping = [] - for phase in phases.allphases: - phasemapping.append(sorted(headsbyphase[phase])) + phasemapping = { + phase: sorted(headsbyphase[phase]) for phase in phases.allphases + } # generate the actual part phasedata = phases.binaryencode(phasemapping) diff -r 361a7444bc41 -r b1e51ef4e536 mercurial/exchangev2.py --- a/mercurial/exchangev2.py Tue Jul 07 22:00:02 2020 +0200 +++ b/mercurial/exchangev2.py Wed Jul 08 00:15:15 2020 +0200 @@ -82,15 +82,12 @@ phases.registernew(repo, tr, phases.draft, csetres[b'added']) # And adjust the phase of all changesets accordingly. - for phase in phases.phasenames: + for phasenumber, phase in phases.phasenames.items(): if phase == b'secret' or not csetres[b'nodesbyphase'][phase]: continue phases.advanceboundary( - repo, - tr, - phases.phasenames.index(phase), - csetres[b'nodesbyphase'][phase], + repo, tr, phasenumber, csetres[b'nodesbyphase'][phase], ) # Write bookmark updates. @@ -361,7 +358,7 @@ # so we can set the linkrev accordingly when manifests are added. manifestnodes[cl.rev(node)] = revision.manifest - nodesbyphase = {phase: set() for phase in phases.phasenames} + nodesbyphase = {phase: set() for phase in phases.phasenames.values()} remotebookmarks = {} # addgroup() expects a 7-tuple describing revisions. This normalizes diff -r 361a7444bc41 -r b1e51ef4e536 mercurial/phases.py --- a/mercurial/phases.py Tue Jul 07 22:00:02 2020 +0200 +++ b/mercurial/phases.py Wed Jul 08 00:15:15 2020 +0200 @@ -128,25 +128,28 @@ _fphasesentry = struct.Struct(b'>i20s') -INTERNAL_FLAG = 64 # Phases for mercurial internal usage only -HIDEABLE_FLAG = 32 # Phases that are hideable - # record phase index public, draft, secret = range(3) -internal = INTERNAL_FLAG | HIDEABLE_FLAG -archived = HIDEABLE_FLAG -allphases = list(range(internal + 1)) -trackedphases = allphases[1:] +archived = 32 # non-continuous for compatibility +internal = 96 # non-continuous for compatibility +allphases = (public, draft, secret, archived, internal) +trackedphases = (draft, secret, archived, internal) # record phase names cmdphasenames = [b'public', b'draft', b'secret'] # known to `hg phase` command -phasenames = [None] * len(allphases) -phasenames[: len(cmdphasenames)] = cmdphasenames +phasenames = dict(enumerate(cmdphasenames)) phasenames[archived] = b'archived' phasenames[internal] = b'internal' +# map phase name to phase number +phasenumber = {name: phase for phase, name in phasenames.items()} +# like phasenumber, but also include maps for the numeric and binary +# phase number to the phase number +phasenumber2 = phasenumber.copy() +phasenumber2.update({phase: phase for phase in phasenames}) +phasenumber2.update({b'%i' % phase: phase for phase in phasenames}) # record phase property -mutablephases = tuple(allphases[1:]) -remotehiddenphases = tuple(allphases[2:]) -localhiddenphases = tuple(p for p in allphases if p & HIDEABLE_FLAG) +mutablephases = (draft, secret, archived, internal) +remotehiddenphases = (secret, archived, internal) +localhiddenphases = (internal, archived) def supportinternal(repo): @@ -167,7 +170,7 @@ """ repo = repo.unfiltered() dirty = False - roots = [set() for i in allphases] + roots = [set() for i in range(max(allphases) + 1)] try: f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots') try: @@ -189,11 +192,10 @@ def binaryencode(phasemapping): """encode a 'phase -> nodes' mapping into a binary stream - Since phases are integer the mapping is actually a python list: - [[PUBLIC_HEADS], [DRAFTS_HEADS], [SECRET_HEADS]] + The revision lists are encoded as (phase, root) pairs. """ binarydata = [] - for phase, nodes in enumerate(phasemapping): + for phase, nodes in pycompat.iteritems(phasemapping): for head in nodes: binarydata.append(_fphasesentry.pack(phase, head)) return b''.join(binarydata) @@ -202,8 +204,9 @@ def binarydecode(stream): """decode a binary stream into a 'phase -> nodes' mapping - Since phases are integer the mapping is actually a python list.""" - headsbyphase = [[] for i in allphases] + The (phase, root) pairs are turned back into a dictionary with + the phase as index and the aggregated roots of that phase as value.""" + headsbyphase = {i: [] for i in allphases} entrysize = _fphasesentry.size while True: entry = stream.read(entrysize) @@ -427,12 +430,16 @@ nativeroots.append( pycompat.maplist(repo.changelog.rev, self.phaseroots[phase]) ) - return repo.changelog.computephases(nativeroots) + revslen, phasesets = repo.changelog.computephases(nativeroots) + phasesets2 = [set() for phase in range(max(allphases) + 1)] + for phase, phaseset in zip(allphases, phasesets): + phasesets2[phase] = phaseset + return revslen, phasesets2 def _computephaserevspure(self, repo): repo = repo.unfiltered() cl = repo.changelog - self._phasesets = [set() for phase in allphases] + self._phasesets = [set() for phase in range(max(allphases) + 1)] lowerroots = set() for phase in reversed(trackedphases): roots = pycompat.maplist(cl.rev, self.phaseroots[phase]) @@ -533,7 +540,7 @@ changes = set() # set of revisions to be changed delroots = [] # set of root deleted by this path - for phase in pycompat.xrange(targetphase + 1, len(allphases)): + for phase in (phase for phase in allphases if phase > targetphase): # filter nodes that are not in a compatible phase already nodes = [ n for n in nodes if self.phase(repo, repo[n].rev()) >= phase @@ -766,7 +773,7 @@ """ cl = repo.changelog - headsbyphase = [[] for i in allphases] + headsbyphase = {i: [] for i in allphases} # No need to keep track of secret phase; any heads in the subset that # are not mentioned are implicitly secret. for phase in allphases[:secret]: @@ -897,13 +904,11 @@ """ v = ui.config(b'phases', b'new-commit') try: - return phasenames.index(v) - except ValueError: - try: - return int(v) - except ValueError: - msg = _(b"phases.new-commit: not a valid phase name ('%s')") - raise error.ConfigError(msg % v) + return phasenumber2[v] + except KeyError: + raise error.ConfigError( + _(b"phases.new-commit: not a valid phase name ('%s')") % v + ) def hassecret(repo):