Mercurial > hg
changeset 50181:02fe65f74be5
convert: use a priority queue for sorting commits, to make sorting faster
To achieve this, we turn commit sorters into classes so they can
encapsulate state.
This reduces the sorting time from ~30s to ~10s on a 500k-commit
prefix of a repo I tried to convert. (and probably reduces the time
to sort the whole repo from many tens of minutes to minutes, but I
didn't try that again)
The date caching gets removed because priority queue already
caches the key.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Thu, 23 Feb 2023 23:25:28 +0100 |
parents | 829aa604d71a |
children | e9d92faa08f8 |
files | hgext/convert/convcmd.py |
diffstat | 1 files changed, 69 insertions(+), 50 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/convert/convcmd.py Tue Feb 21 13:26:07 2023 -0500 +++ b/hgext/convert/convcmd.py Thu Feb 23 23:25:28 2023 +0100 @@ -6,6 +6,7 @@ # GNU General Public License version 2 or any later version. import collections +import heapq import os import shutil @@ -198,6 +199,59 @@ self.progress.complete() +# Sorters are used by the `toposort` function to maintain a set of revisions +# which can be converted immediately and pick one +class branchsorter: + """If the previously converted revision has a child in the + eligible revisions list, pick it. Return the list head + otherwise. Branch sort attempts to minimize branch + switching, which is harmful for Mercurial backend + compression. + """ + + def __init__(self, parents): + self.nodes = [] + self.parents = parents + self.prev = None + + def picknext(self): + next = self.nodes[0] + for n in self.nodes: + if self.prev in self.parents[n]: + next = n + break + self.prev = next + self.nodes.remove(next) + return next + + def insert(self, node): + self.nodes.insert(0, node) + + def __len__(self): + return self.nodes.__len__() + + +class keysorter: + """Key-based sort, ties broken by insertion order""" + + def __init__(self, keyfn): + self.heap = [] + self.keyfn = keyfn + self.counter = 0 + + def picknext(self): + return heapq.heappop(self.heap)[2] + + def insert(self, node): + counter = self.counter + self.counter = counter + 1 + key = self.keyfn(node) + heapq.heappush(self.heap, (key, counter, node)) + + def __len__(self): + return self.heap.__len__() + + class converter: def __init__(self, ui, source, dest, revmapfile, opts): @@ -364,37 +418,10 @@ return children, roots - # Sort functions are supposed to take a list of revisions which - # can be converted immediately and pick one - - def makebranchsorter(): - """If the previously converted revision has a child in the - eligible revisions list, pick it. Return the list head - otherwise. Branch sort attempts to minimize branch - switching, which is harmful for Mercurial backend - compression. - """ - prev = [None] - - def picknext(nodes): - next = nodes[0] - for n in nodes: - if prev[0] in parents[n]: - next = n - break - prev[0] = next - return next - - return picknext - def makesourcesorter(): """Source specific sort.""" keyfn = lambda n: self.commitcache[n].sortkey - - def picknext(nodes): - return sorted(nodes, key=keyfn)[0] - - return picknext + return keysorter(keyfn) def makeclosesorter(): """Close order sort.""" @@ -402,44 +429,36 @@ b'close' not in self.commitcache[n].extra, self.commitcache[n].sortkey, ) - - def picknext(nodes): - return sorted(nodes, key=keyfn)[0] - - return picknext + return keysorter(keyfn) def makedatesorter(): """Sort revisions by date.""" - dates = {} def getdate(n): - if n not in dates: - dates[n] = dateutil.parsedate(self.commitcache[n].date) - return dates[n] + return dateutil.parsedate(self.commitcache[n].date) - def picknext(nodes): - return min([(getdate(n), n) for n in nodes])[1] - - return picknext + return keysorter(getdate) if sortmode == b'branchsort': - picknext = makebranchsorter() + sorter = branchsorter(parents) elif sortmode == b'datesort': - picknext = makedatesorter() + sorter = makedatesorter() elif sortmode == b'sourcesort': - picknext = makesourcesorter() + sorter = makesourcesorter() elif sortmode == b'closesort': - picknext = makeclosesorter() + sorter = makeclosesorter() else: raise error.Abort(_(b'unknown sort mode: %s') % sortmode) - children, actives = mapchildren(parents) + children, roots = mapchildren(parents) + + for node in roots: + sorter.insert(node) s = [] pendings = {} - while actives: - n = picknext(actives) - actives.remove(n) + while sorter: + n = sorter.picknext() s.append(n) # Update dependents list @@ -455,7 +474,7 @@ ) if not pendings[c]: # Parents are converted, node is eligible - actives.insert(0, c) + sorter.insert(c) pendings[c] = None if len(s) != len(parents):