Mercurial > evolve
view hgext3rd/evolve/obsdiscovery.py @ 2083:778afb036245
discovery: introduce a official 'obshash associated to a range
The hash in a combination of the hash a its subrange. If the range is a leaf,
we have the obsmarker relevant to the node in that leaf range.
author | Pierre-Yves David <pierre-yves.david@ens-lyon.org> |
---|---|
date | Fri, 10 Mar 2017 10:36:46 -0800 |
parents | 3f787182509f |
children | b1f029dcf573 |
line wrap: on
line source
# Code dedicated to the discovery of obsolescence marker "over the wire" # # Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import try: import StringIO as io StringIO = io.StringIO except ImportError: import io StringIO = io.StringIO import collections import hashlib import math from mercurial import ( cmdutil, commands, dagutil, error, exchange, extensions, localrepo, node, obsolete, scmutil, setdiscovery, util, wireproto, ) from mercurial.hgweb import hgweb_mod from mercurial.i18n import _ from . import ( exthelper, utility, ) eh = exthelper.exthelper() obsexcmsg = utility.obsexcmsg ########################################## ### trigger discovery during exchange ### ########################################## @eh.wrapfunction(exchange, '_pushdiscoveryobsmarkers') def _pushdiscoveryobsmarkers(orig, pushop): if (obsolete.isenabled(pushop.repo, obsolete.exchangeopt) and pushop.repo.obsstore and 'obsolete' in pushop.remote.listkeys('namespaces')): repo = pushop.repo obsexcmsg(repo.ui, "computing relevant nodes\n") revs = list(repo.revs('::%ln', pushop.futureheads)) unfi = repo.unfiltered() cl = unfi.changelog if not pushop.remote.capable('_evoext_obshash_0'): # do not trust core yet # return orig(pushop) nodes = [cl.node(r) for r in revs] if nodes: obsexcmsg(repo.ui, "computing markers relevant to %i nodes\n" % len(nodes)) pushop.outobsmarkers = repo.obsstore.relevantmarkers(nodes) else: obsexcmsg(repo.ui, "markers already in sync\n") pushop.outobsmarkers = [] pushop.outobsmarkers = repo.obsstore.relevantmarkers(nodes) return common = [] obsexcmsg(repo.ui, "looking for common markers in %i nodes\n" % len(revs)) commonrevs = list(unfi.revs('::%ln', pushop.outgoing.commonheads)) common = findcommonobsmarkers(pushop.ui, unfi, pushop.remote, commonrevs) revs = list(unfi.revs('%ld - (::%ln)', revs, common)) nodes = [cl.node(r) for r in revs] if nodes: obsexcmsg(repo.ui, "computing markers relevant to %i nodes\n" % len(nodes)) pushop.outobsmarkers = repo.obsstore.relevantmarkers(nodes) else: obsexcmsg(repo.ui, "markers already in sync\n") pushop.outobsmarkers = [] @eh.extsetup def _installobsmarkersdiscovery(ui): olddisco = exchange.pushdiscoverymapping['obsmarker'] def newdisco(pushop): _pushdiscoveryobsmarkers(olddisco, pushop) exchange.pushdiscoverymapping['obsmarker'] = newdisco def buildpullobsmarkersboundaries(pullop): """small function returning the argument for pull markers call may to contains 'heads' and 'common'. skip the key for None. It is a separed function to play around with strategy for that.""" repo = pullop.repo remote = pullop.remote unfi = repo.unfiltered() revs = unfi.revs('::(%ln - null)', pullop.common) common = [node.nullid] if remote.capable('_evoext_obshash_0'): obsexcmsg(repo.ui, "looking for common markers in %i nodes\n" % len(revs)) common = findcommonobsmarkers(repo.ui, repo, remote, revs) return {'heads': pullop.pulledsubset, 'common': common} ################################## ### Code performing discovery ### ################################## def findcommonobsmarkers(ui, local, remote, probeset, initialsamplesize=100, fullsamplesize=200): # from discovery roundtrips = 0 cl = local.changelog dag = dagutil.revlogdag(cl) missing = set() common = set() undecided = set(probeset) totalnb = len(undecided) ui.progress(_("comparing with other"), 0, total=totalnb) _takefullsample = setdiscovery._takefullsample if remote.capable('_evoext_obshash_1'): getremotehash = remote.evoext_obshash1 localhash = _obsrelsethashtreefm1(local) else: getremotehash = remote.evoext_obshash localhash = _obsrelsethashtreefm0(local) while undecided: ui.note(_("sampling from both directions\n")) if len(undecided) < fullsamplesize: sample = set(undecided) else: sample = _takefullsample(dag, undecided, size=fullsamplesize) roundtrips += 1 ui.progress(_("comparing with other"), totalnb - len(undecided), total=totalnb) ui.debug("query %i; still undecided: %i, sample size is: %i\n" % (roundtrips, len(undecided), len(sample))) # indices between sample and externalized version must match sample = list(sample) remotehash = getremotehash(dag.externalizeall(sample)) yesno = [localhash[ix][1] == remotehash[si] for si, ix in enumerate(sample)] commoninsample = set(n for i, n in enumerate(sample) if yesno[i]) common.update(dag.ancestorset(commoninsample, common)) missinginsample = [n for i, n in enumerate(sample) if not yesno[i]] missing.update(dag.descendantset(missinginsample, missing)) undecided.difference_update(missing) undecided.difference_update(common) ui.progress(_("comparing with other"), None) result = dag.headsetofconnecteds(common) ui.debug("%d total queries\n" % roundtrips) if not result: return set([node.nullid]) return dag.externalizeall(result) ################################## ### Stable topological sorting ### ################################## @eh.command( 'debugstablesort', [ ('', 'rev', [], 'heads to start from'), ] + commands.formatteropts, _('')) def debugstablesort(ui, repo, **opts): """display the ::REVS set topologically sorted in a stable way """ revs = scmutil.revrange(repo, opts['rev']) displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True) for r in _stablesort(repo, revs): ctx = repo[r] displayer.show(ctx) displayer.flush(ctx) displayer.close() def _stablesort(repo, revs): """return '::revs' topologically sorted in "stable" order This is a depth first traversal starting from 'nullrev', using node as a tie breaker. """ # Various notes: # # * Bitbucket is used dates as tie breaker, that might be a good idea. # # * It seemds we can traverse in the same order from (one) head to bottom, # if we the following record data for each merge: # # - highest (stablesort-wise) common ancestors, # - order of parents (tablesort-wise) cl = repo.changelog parents = cl.parentrevs nullrev = node.nullrev n = cl.node # step 1: We need a parents -> children mapping for 2 reasons. # # * we build the order from nullrev to tip # # * we need to detect branching children = collections.defaultdict(list) for r in cl.ancestors(revs, inclusive=True): p1, p2 = parents(r) children[p1].append(r) if p2 != nullrev: children[p2].append(r) # step two: walk back up # * pick lowest node in case of branching # * stack disregarded part of the branching # * process merge when both parents are yielded # track what changeset has been seen = [0] * (max(revs) + 1) # starts from repository roots # reuse the list form the mapping as we won't need it again anyway stack = children[nullrev] if not stack: return [] if 1 < len(stack): stack.sort(key=n, reverse=True) # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already result = [] current = stack.pop() while current is not None or stack: if current is None: # previous iteration reached a merge or an unready merge, current = stack.pop() if seen[current]: current = None continue p1, p2 = parents(current) if p2 != nullrev and not (seen[p1] and seen[p2]): # we can't iterate on this merge yet because other child is not # yielded yet (and we are topo sorting) we can discard it for now # because it will be reached from the other child. current = None continue seen[current] = True result.append(current) # could be yield, cf earlier comment cs = children[current] if not cs: current = None elif 1 == len(cs): current = cs[0] else: cs.sort(key=n, reverse=True) current = cs.pop() # proceed on smallest stack.extend(cs) # stack the rest for later return result ############################## ### Range Hash computation ### ############################## @eh.command( 'debugstablerange', [ ('', 'rev', [], 'heads to start from'), ], _('')) def debugstablerange(ui, repo, **opts): """display the ::REVS set topologically sorted in a stable way """ n = repo.changelog.node s = node.short revs = scmutil.revrange(repo, opts['rev']) toproceed = [_range(repo, r, 0, ) for r in revs] ranges = set(toproceed) while toproceed: entry = toproceed.pop() for r in entry.subranges(): if r not in ranges: ranges.add(r) toproceed.append(r) ranges = list(ranges) ranges.sort(key=lambda r: (-len(r), n(r.head))) ui.status('rev node index size depth obshash\n') for r in ranges: d = (r.head, s(n(r.head)), r.index, len(r), r.depth, node.short(r.obshash)) ui.status('%3d %s %5d %4d %5d %s\n' % d) _depthcache = {} def _depth(repo, rev): cl = repo.changelog n = cl.node(rev) depth = _depthcache.get(n, None) if depth is None: depth = _depthcache[n] = len(list(cl.ancestors([rev], inclusive=True))) return depth def _hlp2(i): """return highest power of two lower than 'i'""" return 2 ** int(math.log(i - 1, 2)) class _range(object): def __init__(self, repo, head, index, revs=None): self._repo = repo self.head = head self.index = index if revs is not None: self._revs = revs assert index < self.depth, (head, index, self.depth, revs) def __hash__(self): return self._id def __eq__(self, other): if type(self) != type(other): raise NotImplementedError() return self.stablekey == other.stablekey @util.propertycache def _id(self): return hash(self.stablekey) @util.propertycache def stablekey(self): return (self.node, self.index) @util.propertycache def node(self): return self._repo.changelog.node(self.head) def __len__(self): return len(self._revs) @util.propertycache def depth(self): return _depth(self._repo, self.head) @util.propertycache def _revs(self): return _stablesort(self._repo, [self.head])[self.index:] def _slicesat(self, globalindex): localindex = globalindex - self.index cl = self._repo.changelog bottom = self._revs[:localindex] top = self._revs[localindex:] bheads = self._repo.revs('heads(%ld)', bottom) result = [] if len(bheads) == 1: newhead = bottom[-1] newstart = _depth(self._repo, newhead) - len(bottom) result.append(_range(self._repo, newhead, newstart, bottom)) else: cl = self._repo.changelog for h in bheads: subset = cl.ancestors([h], inclusive=True) hrevs = [r for r in bottom if r in subset] start = _depth(self._repo, h) - len(hrevs) entry = _range(self._repo, h, start, [r for r in bottom if r in subset]) result.append(entry) result.append(_range(self._repo, self.head, globalindex, top)) return result def subranges(self): if len(self) == 1: return [] step = _hlp2(self.depth) standard_start = 0 while standard_start < self.index and 0 < step: if standard_start + step < self.depth: standard_start += step step //= 2 if self.index == standard_start: slicesize = _hlp2(len(self)) return self._slicesat(self.index + slicesize) else: assert standard_start < self.depth return self._slicesat(standard_start) @util.propertycache def obshash(self): cache = self._repo.obsstore.rangeobshashcache cl = self._repo.changelog n = cl.node obshash = cache.get(self._id) if obshash is not None: return obshash sha = hashlib.sha1() count = 0 if len(self) == 1: tmarkers = self._repo.obsstore.relevantmarkers([n(self.head)]) bmarkers = [] for m in tmarkers: mbin = obsolete._fm1encodeonemarker(m) bmarkers.append(mbin) bmarkers.sort() for m in bmarkers: count += 1 sha.update(m) else: for subrange in self.subranges(): obshash = subrange.obshash if obshash != node.nullid: count += 1 sha.update(obshash) # note: if there is only one subrange with actual data, we'll just # reuse the same hash. if not count: obshash = node.nullid elif count != 1 or obshash is None: obshash = cache[self._id] = sha.digest() return obshash @eh.wrapfunction(obsolete.obsstore, '_addmarkers') def _addmarkers(orig, obsstore, *args, **kwargs): obsstore.rangeobshashcache.clear() return orig(obsstore, *args, **kwargs) @eh.addattr(obsolete.obsstore, 'rangeobshashcache') @util.propertycache def rangeobshashcache(obsstore): return {} ############################# ### Tree Hash computation ### ############################# # Dash computed from a given changesets using all markers relevant to it and # the obshash of its parents. This is similar to what happend for changeset # node where the parent is used in the computation @eh.command( 'debugobsrelsethashtree', [('', 'v0', None, 'hash on marker format "0"'), ('', 'v1', None, 'hash on marker format "1" (default)')], _('')) def debugobsrelsethashtree(ui, repo, v0=False, v1=False): """display Obsolete markers, Relevant Set, Hash Tree changeset-node obsrelsethashtree-node It computed form the "orsht" of its parent and markers relevant to the changeset itself.""" if v0 and v1: raise error.Abort('cannot only specify one format') elif v0: treefunc = _obsrelsethashtreefm0 else: treefunc = _obsrelsethashtreefm1 for chg, obs in treefunc(repo): ui.status('%s %s\n' % (node.hex(chg), node.hex(obs))) def _obsrelsethashtreefm0(repo): return _obsrelsethashtree(repo, obsolete._fm0encodeonemarker) def _obsrelsethashtreefm1(repo): return _obsrelsethashtree(repo, obsolete._fm1encodeonemarker) def _obsrelsethashtree(repo, encodeonemarker): cache = [] unfi = repo.unfiltered() markercache = {} repo.ui.progress(_("preparing locally"), 0, total=len(unfi)) for i in unfi: ctx = unfi[i] entry = 0 sha = hashlib.sha1() # add data from p1 for p in ctx.parents(): p = p.rev() if p < 0: p = node.nullid else: p = cache[p][1] if p != node.nullid: entry += 1 sha.update(p) tmarkers = repo.obsstore.relevantmarkers([ctx.node()]) if tmarkers: bmarkers = [] for m in tmarkers: if m not in markercache: markercache[m] = encodeonemarker(m) bmarkers.append(markercache[m]) bmarkers.sort() for m in bmarkers: entry += 1 sha.update(m) if entry: cache.append((ctx.node(), sha.digest())) else: cache.append((ctx.node(), node.nullid)) repo.ui.progress(_("preparing locally"), i, total=len(unfi)) repo.ui.progress(_("preparing locally"), None) return cache def _obshash(repo, nodes, version=0): if version == 0: hashs = _obsrelsethashtreefm0(repo) elif version == 1: hashs = _obsrelsethashtreefm1(repo) else: assert False nm = repo.changelog.nodemap revs = [nm.get(n) for n in nodes] return [r is None and node.nullid or hashs[r][1] for r in revs] @eh.addattr(localrepo.localpeer, 'evoext_obshash') def local_obshash(peer, nodes): return _obshash(peer._repo, nodes) @eh.addattr(localrepo.localpeer, 'evoext_obshash1') def local_obshash1(peer, nodes): return _obshash(peer._repo, nodes, version=1) @eh.addattr(wireproto.wirepeer, 'evoext_obshash') def peer_obshash(self, nodes): d = self._call("evoext_obshash", nodes=wireproto.encodelist(nodes)) try: return wireproto.decodelist(d) except ValueError: self._abort(error.ResponseError(_("unexpected response:"), d)) @eh.addattr(wireproto.wirepeer, 'evoext_obshash1') def peer_obshash1(self, nodes): d = self._call("evoext_obshash1", nodes=wireproto.encodelist(nodes)) try: return wireproto.decodelist(d) except ValueError: self._abort(error.ResponseError(_("unexpected response:"), d)) def srv_obshash(repo, proto, nodes): return wireproto.encodelist(_obshash(repo, wireproto.decodelist(nodes))) def srv_obshash1(repo, proto, nodes): return wireproto.encodelist(_obshash(repo, wireproto.decodelist(nodes), version=1)) def _obshash_capabilities(orig, repo, proto): """wrapper to advertise new capability""" caps = orig(repo, proto) if obsolete.isenabled(repo, obsolete.exchangeopt): caps = caps.split() caps.append('_evoext_obshash_0') caps.append('_evoext_obshash_1') caps.sort() caps = ' '.join(caps) return caps @eh.extsetup def obshash_extsetup(ui): hgweb_mod.perms['evoext_obshash'] = 'pull' hgweb_mod.perms['evoext_obshash1'] = 'pull' wireproto.commands['evoext_obshash'] = (srv_obshash, 'nodes') wireproto.commands['evoext_obshash1'] = (srv_obshash1, 'nodes') extensions.wrapfunction(wireproto, 'capabilities', _obshash_capabilities) # wrap command content oldcap, args = wireproto.commands['capabilities'] def newcap(repo, proto): return _obshash_capabilities(oldcap, repo, proto) wireproto.commands['capabilities'] = (newcap, args)