Mercurial > hg
view mercurial/pvec.py @ 29610:754f63671229 stable
rebase: turn rebase revs into set before filtering obsolete
When the inhibit extension from mutable-history is enabled, it attempts to
iterate over the rebaseset to prevent the nodes being rebased from being
marked obsolete. This happens at the same time as rebase's
_filterobsoleterevs function trying to iterate over the rebaseset to figure
out which ones are obsolete. The two of these iterating over the same
revset generatorset cause a 'generator already executing' exception. This is
probably a flaw in the revset implementation, since iterating over the same
set twice should be supported.
This regression was introduced in 5d16ebe7b14, since it changed
_filterobsoleterevs to be called before the rebaseset was turned into a
set(). For now let’s just make the rebaseset an actual set again before
calling that function. This was caught by the inhibit tests.
The relevant call stack from test-inhibit.t:
File "/tmp/hgtests.jgjrN5/install/lib/python/hgext/rebase.py", line 285, in _preparenewrebase
obsrevs = _filterobsoleterevs(self.repo, rebaseset)
File "/data/hgbuild/facebook-hg-rpms/mutable-history/hgext/inhibit.py", line 197, in _filterobsoleterevswrap
r = orig(repo, rebasesetrevs, *args, **kwargs)
File "/tmp/hgtests.jgjrN5/install/lib/python/hgext/rebase.py", line 1380, in _filterobsoleterevs
return set(r for r in revs if repo[r].obsolete())
File "/tmp/hgtests.jgjrN5/install/lib/python/hgext/rebase.py", line 1380, in <genexpr>
return set(r for r in revs if repo[r].obsolete())
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/revset.py", line 3079, in _iterordered
val2 = next(iter2)
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/revset.py", line 3417, in gen
yield nextrev()
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/revset.py", line 3424, in _consumegen
for item in self._gen:
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/revset.py", line 71, in iterate
cl = repo.changelog
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/repoview.py", line 319, in changelog
revs = filterrevs(unfi, self.filtername)
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/repoview.py", line 261, in filterrevs
repo.filteredrevcache[filtername] = func(repo.unfiltered())
File "/data/hgbuild/facebook-hg-rpms/mutable-history/hgext/directaccess.py", line 65, in _computehidden
hidden = repoview.filterrevs(repo, 'visible')
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/repoview.py", line 261, in filterrevs
repo.filteredrevcache[filtername] = func(repo.unfiltered())
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/repoview.py", line 175, in computehidden
hideable = hideablerevs(repo)
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/repoview.py", line 33, in hideablerevs
return obsolete.getrevs(repo, 'obsolete')
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/obsolete.py", line 1097, in getrevs
repo.obsstore.caches[name] = cachefuncs[name](repo)
File "/data/hgbuild/facebook-hg-rpms/mutable-history/hgext/inhibit.py", line 255, in _computeobsoleteset
if getrev(n) not in blacklist:
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/revset.py", line 3264, in __contains__
return x in self._r1 or x in self._r2
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/revset.py", line 3348, in __contains__
for l in self._consumegen():
File "/tmp/hgtests.jgjrN5/install/lib/python/mercurial/revset.py", line 3424, in _consumegen
for item in self._gen:
ValueError: generator already executing
author | Simon Farnsworth <simonfar@fb.com> |
---|---|
date | Tue, 19 Jul 2016 03:29:53 -0700 |
parents | 983e93d88193 |
children | 4462a981e8df |
line wrap: on
line source
# pvec.py - probabilistic vector clocks for Mercurial # # Copyright 2012 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. ''' A "pvec" is a changeset property based on the theory of vector clocks that can be compared to discover relatedness without consulting a graph. This can be useful for tasks like determining how a disconnected patch relates to a repository. Currently a pvec consist of 448 bits, of which 24 are 'depth' and the remainder are a bit vector. It is represented as a 70-character base85 string. Construction: - a root changeset has a depth of 0 and a bit vector based on its hash - a normal commit has a changeset where depth is increased by one and one bit vector bit is flipped based on its hash - a merge changeset pvec is constructed by copying changes from one pvec into the other to balance its depth Properties: - for linear changes, difference in depth is always <= hamming distance - otherwise, changes are probably divergent - when hamming distance is < 200, we can reliably detect when pvecs are near Issues: - hamming distance ceases to work over distances of ~ 200 - detecting divergence is less accurate when the common ancestor is very close to either revision or total distance is high - this could probably be improved by modeling the relation between delta and hdist Uses: - a patch pvec can be used to locate the nearest available common ancestor for resolving conflicts - ordering of patches can be established without a DAG - two head pvecs can be compared to determine whether push/pull/merge is needed and approximately how many changesets are involved - can be used to find a heuristic divergence measure between changesets on different branches ''' from __future__ import absolute_import from .node import nullrev from . import ( base85, util, ) _size = 448 # 70 chars b85-encoded _bytes = _size / 8 _depthbits = 24 _depthbytes = _depthbits / 8 _vecbytes = _bytes - _depthbytes _vecbits = _vecbytes * 8 _radius = (_vecbits - 30) / 2 # high probability vectors are related def _bin(bs): '''convert a bytestring to a long''' v = 0 for b in bs: v = v * 256 + ord(b) return v def _str(v, l): bs = "" for p in xrange(l): bs = chr(v & 255) + bs v >>= 8 return bs def _split(b): '''depth and bitvec''' return _bin(b[:_depthbytes]), _bin(b[_depthbytes:]) def _join(depth, bitvec): return _str(depth, _depthbytes) + _str(bitvec, _vecbytes) def _hweight(x): c = 0 while x: if x & 1: c += 1 x >>= 1 return c _htab = [_hweight(x) for x in xrange(256)] def _hamming(a, b): '''find the hamming distance between two longs''' d = a ^ b c = 0 while d: c += _htab[d & 0xff] d >>= 8 return c def _mergevec(x, y, c): # Ideally, this function would be x ^ y ^ ancestor, but finding # ancestors is a nuisance. So instead we find the minimal number # of changes to balance the depth and hamming distance d1, v1 = x d2, v2 = y if d1 < d2: d1, d2, v1, v2 = d2, d1, v2, v1 hdist = _hamming(v1, v2) ddist = d1 - d2 v = v1 m = v1 ^ v2 # mask of different bits i = 1 if hdist > ddist: # if delta = 10 and hdist = 100, then we need to go up 55 steps # to the ancestor and down 45 changes = (hdist - ddist + 1) / 2 else: # must make at least one change changes = 1 depth = d1 + changes # copy changes from v2 if m: while changes: if m & i: v ^= i changes -= 1 i <<= 1 else: v = _flipbit(v, c) return depth, v def _flipbit(v, node): # converting bit strings to longs is slow bit = (hash(node) & 0xffffffff) % _vecbits return v ^ (1<<bit) def ctxpvec(ctx): '''construct a pvec for ctx while filling in the cache''' r = ctx.repo() if not util.safehasattr(r, "_pveccache"): r._pveccache = {} pvc = r._pveccache if ctx.rev() not in pvc: cl = r.changelog for n in xrange(ctx.rev() + 1): if n not in pvc: node = cl.node(n) p1, p2 = cl.parentrevs(n) if p1 == nullrev: # start with a 'random' vector at root pvc[n] = (0, _bin((node * 3)[:_vecbytes])) elif p2 == nullrev: d, v = pvc[p1] pvc[n] = (d + 1, _flipbit(v, node)) else: pvc[n] = _mergevec(pvc[p1], pvc[p2], node) bs = _join(*pvc[ctx.rev()]) return pvec(base85.b85encode(bs)) class pvec(object): def __init__(self, hashorctx): if isinstance(hashorctx, str): self._bs = hashorctx self._depth, self._vec = _split(base85.b85decode(hashorctx)) else: self._vec = ctxpvec(hashorctx) def __str__(self): return self._bs def __eq__(self, b): return self._vec == b._vec and self._depth == b._depth def __lt__(self, b): delta = b._depth - self._depth if delta < 0: return False # always correct if _hamming(self._vec, b._vec) > delta: return False return True def __gt__(self, b): return b < self def __or__(self, b): delta = abs(b._depth - self._depth) if _hamming(self._vec, b._vec) <= delta: return False return True def __sub__(self, b): if self | b: raise ValueError("concurrent pvecs") return self._depth - b._depth def distance(self, b): d = abs(b._depth - self._depth) h = _hamming(self._vec, b._vec) return max(d, h) def near(self, b): dist = abs(b.depth - self._depth) if dist > _radius or _hamming(self._vec, b._vec) > _radius: return False