Mercurial > hg
changeset 41879:e5ece0f46b40
discovery: moved sampling functions inside discovery object
In this patch, we transform the sampling functions into
methods of the `partialdiscovery` class in the Python case.
This will provide multiple benefit. For example we can keep some cache from one
sampling to another. In addition this will help the Oxidation work as all graph
traversal related logic will be contained in a single object.
author | Georges Racinet <georges.racinet@octobus.net> |
---|---|
date | Wed, 27 Feb 2019 23:55:19 +0100 |
parents | 82884bbf8d2b |
children | 55919b96c02a |
files | mercurial/setdiscovery.py |
diffstat | 1 files changed, 67 insertions(+), 66 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/setdiscovery.py Wed Feb 27 23:45:06 2019 +0100 +++ b/mercurial/setdiscovery.py Wed Feb 27 23:55:19 2019 +0100 @@ -92,69 +92,6 @@ dist.setdefault(p, d + 1) visit.append(p) -def _takequicksample(repo, headrevs, revs, size): - """takes a quick sample of size <size> - - It is meant for initial sampling and focuses on querying heads and close - ancestors of heads. - - :dag: a dag object - :headrevs: set of head revisions in local DAG to consider - :revs: set of revs to discover - :size: the maximum size of the sample""" - if len(revs) <= size: - return list(revs) - sample = set(repo.revs('heads(%ld)', revs)) - - if len(sample) >= size: - return _limitsample(sample, size) - - _updatesample(None, headrevs, sample, repo.changelog.parentrevs, - quicksamplesize=size) - return sample - -def _takefullsample(repo, headrevs, revs, size): - if len(revs) <= size: - return list(revs) - sample = set(repo.revs('heads(%ld)', revs)) - - # update from heads - revsheads = set(repo.revs('heads(%ld)', revs)) - _updatesample(revs, revsheads, sample, repo.changelog.parentrevs) - - # update from roots - revsroots = set(repo.revs('roots(%ld)', revs)) - - # _updatesample() essentially does interaction over revisions to look up - # their children. This lookup is expensive and doing it in a loop is - # quadratic. We precompute the children for all relevant revisions and - # make the lookup in _updatesample() a simple dict lookup. - # - # Because this function can be called multiple times during discovery, we - # may still perform redundant work and there is room to optimize this by - # keeping a persistent cache of children across invocations. - children = {} - - parentrevs = repo.changelog.parentrevs - for rev in repo.changelog.revs(start=min(revsroots)): - # Always ensure revision has an entry so we don't need to worry about - # missing keys. - children.setdefault(rev, []) - - for prev in parentrevs(rev): - if prev == nullrev: - continue - - children.setdefault(prev, []).append(rev) - - _updatesample(revs, revsroots, sample, children.__getitem__) - assert sample - sample = _limitsample(sample, size) - if len(sample) < size: - more = size - len(sample) - sample.update(random.sample(list(revs - sample), more)) - return sample - def _limitsample(sample, desiredlen): """return a random subset of sample of at most desiredlen item""" if len(sample) > desiredlen: @@ -228,6 +165,70 @@ # common.bases and all its ancestors return self._common.basesheads() + def takequicksample(self, headrevs, size): + """takes a quick sample of size <size> + + It is meant for initial sampling and focuses on querying heads and close + ancestors of heads. + + :headrevs: set of head revisions in local DAG to consider + :size: the maximum size of the sample""" + revs = self.undecided + if len(revs) <= size: + return list(revs) + sample = set(self._repo.revs('heads(%ld)', revs)) + + if len(sample) >= size: + return _limitsample(sample, size) + + _updatesample(None, headrevs, sample, self._repo.changelog.parentrevs, + quicksamplesize=size) + return sample + + def takefullsample(self, headrevs, size): + revs = self.undecided + if len(revs) <= size: + return list(revs) + repo = self._repo + sample = set(repo.revs('heads(%ld)', revs)) + + # update from heads + revsheads = set(repo.revs('heads(%ld)', revs)) + _updatesample(revs, revsheads, sample, repo.changelog.parentrevs) + + # update from roots + revsroots = set(repo.revs('roots(%ld)', revs)) + + # _updatesample() essentially does interaction over revisions to look + # up their children. This lookup is expensive and doing it in a loop is + # quadratic. We precompute the children for all relevant revisions and + # make the lookup in _updatesample() a simple dict lookup. + # + # Because this function can be called multiple times during discovery, + # we may still perform redundant work and there is room to optimize + # this by keeping a persistent cache of children across invocations. + children = {} + + parentrevs = repo.changelog.parentrevs + for rev in repo.changelog.revs(start=min(revsroots)): + # Always ensure revision has an entry so we don't need to worry + # about missing keys. + children.setdefault(rev, []) + + for prev in parentrevs(rev): + if prev == nullrev: + continue + + children.setdefault(prev, []).append(rev) + + _updatesample(revs, revsroots, sample, children.__getitem__) + assert sample + sample = _limitsample(sample, size) + if len(sample) < size: + more = size - len(sample) + sample.update(random.sample(list(revs - sample), more)) + return sample + def findcommonheads(ui, local, remote, initialsamplesize=100, fullsamplesize=200, @@ -309,14 +310,14 @@ ui.note(_("sampling from both directions\n")) else: ui.debug("taking initial sample\n") - samplefunc = _takefullsample + samplefunc = disco.takefullsample targetsize = fullsamplesize else: # use even cheaper initial sample ui.debug("taking quick initial sample\n") - samplefunc = _takequicksample + samplefunc = disco.takequicksample targetsize = initialsamplesize - sample = samplefunc(local, ownheads, disco.undecided, targetsize) + sample = samplefunc(ownheads, targetsize) roundtrips += 1 progress.update(roundtrips)