Mercurial > hg
changeset 39336:1c6ff52fe9cf
revlogdeltas: split candidate groups selection from the filtering logic
The group iteration has two main components:
* walking candidates, the logic that we are about to extend to build
intermediate snapshots,
* Making sure we test diffs against interesting bases. No duplicated tests,
skipping empty revisions, etc.
We split `_candidategroups` to separate the two components. This achieves two
goals:
* It will be simpler to update the walking logic for intermediate snapshots,
* We can gather the filtering logic from `finddeltainfo` into
`_candidategroups` to centralize it.
author | Boris Feld <boris.feld@octobus.net> |
---|---|
date | Wed, 29 Aug 2018 09:55:11 -0700 |
parents | 1441eb38849f |
children | 37957e07138c |
files | mercurial/revlogutils/deltas.py |
diffstat | 1 files changed, 47 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/revlogutils/deltas.py Sat Aug 18 07:32:05 2018 +0200 +++ b/mercurial/revlogutils/deltas.py Wed Aug 29 09:55:11 2018 -0700 @@ -569,9 +569,29 @@ return True def _candidategroups(revlog, p1, p2, cachedelta): + """Provides group of revision to be tested as delta base + + This top level function focus on emitting groups with unique and worthwhile + content. See _raw_candidate_groups for details about the group order. """ - Provides revisions that present an interest to be diffed against, - grouped by level of easiness. + # should we try to build a delta? + if not (len(revlog) and revlog._storedeltachains): + return + + tested = set([nullrev]) + for group in _rawgroups(revlog, p1, p2, cachedelta): + group = tuple(r for r in group if r not in tested) + tested.update(group) + if group: + yield group + +def _rawgroups(revlog, p1, p2, cachedelta): + """Provides group of revision to be tested as delta base + + This lower level function focus on emitting delta theorically interresting + without looking it any practical details. + + The group order aims at providing fast or small candidates first. """ gdelta = revlog._generaldelta curr = len(revlog) @@ -590,29 +610,33 @@ yield (cachedelta[0],) tested.add(cachedelta[0]) - if gdelta: - # exclude already lazy tested base if any - parents = [p for p in (p1, p2) - if p != nullrev and p not in tested] + # This condition is true most of the time when processing + # changegroup data into a generaldelta repo. The only time it + # isn't true is if this is the first revision in a delta chain + # or if ``format.generaldelta=true`` disabled ``lazydeltabase``. + if cachedelta and gdelta and revlog._lazydeltabase: + # Assume what we received from the server is a good choice + # build delta will reuse the cache + yield (cachedelta[0],) + + if gdelta: + # exclude already lazy tested base if any + parents = [p for p in (p1, p2) if p != nullrev] - if not revlog._deltabothparents and len(parents) == 2: - parents.sort() - # To minimize the chance of having to build a fulltext, - # pick first whichever parent is closest to us (max rev) - yield (parents[1],) - # then the other one (min rev) if the first did not fit - yield (parents[0],) - tested.update(parents) - elif len(parents) > 0: - # Test all parents (1 or 2), and keep the best candidate - yield parents - tested.update(parents) + if not revlog._deltabothparents and len(parents) == 2: + parents.sort() + # To minimize the chance of having to build a fulltext, + # pick first whichever parent is closest to us (max rev) + yield (parents[1],) + # then the other one (min rev) if the first did not fit + yield (parents[0],) + elif len(parents) > 0: + # Test all parents (1 or 2), and keep the best candidate + yield parents - if prev not in tested: - # other approach failed try against prev to hopefully save us a - # fulltext. - yield (prev,) - tested.add(prev) + # other approach failed try against prev to hopefully save us a + # fulltext. + yield (prev,) class deltacomputer(object): def __init__(self, revlog):