# HG changeset patch # User Pierre-Yves David # Date 1498119209 -7200 # Node ID 13313d0cab7138fd74d644217635ec629a4371c1 # Parent 839c2879edccd82df0dfc095fb0dcc921c6f71b6 topicmap: massive rework Massively rework the way we build and use topicmap. This bring massive performance benefit. Topic map use to be a fully independant thing that we would switch on and off globaly. The caching on disk was broken so the performance were atrocious. Intead, now the topic are inherited from the 'immutable' map. We gave up on storing them on disk for now since the mutable set is usually small enough. The activation is done by hacking new "filter" on the repository and detection when they are one. This is hacky but core is hard to wrap here. Overall this whole wrapping is really scary and we should massage core API to help it. diff -r 839c2879edcc -r 13313d0cab71 README --- a/README Thu Jun 22 09:47:14 2017 +0200 +++ b/README Thu Jun 22 10:13:29 2017 +0200 @@ -131,6 +131,7 @@ - stack: properly abort when and unknown topic is requested, - topic: changing topic on revs no longer adds extra instability (issue5441) - topic: topics: rename '--change' flag to '--rev' flag, + - topic: multiple large performance improvements, 6.4.1 - in progress ------------------- diff -r 839c2879edcc -r 13313d0cab71 hgext3rd/topic/__init__.py --- a/hgext3rd/topic/__init__.py Thu Jun 22 09:47:14 2017 +0200 +++ b/hgext3rd/topic/__init__.py Thu Jun 22 10:13:29 2017 +0200 @@ -56,7 +56,6 @@ from mercurial.i18n import _ from mercurial import ( - branchmap, cmdutil, commands, context, @@ -169,6 +168,8 @@ if not isinstance(repo, localrepo.localrepository): return # this can be a peer in the ssh case (puzzling) + repo = repo.unfiltered() + if repo.ui.config('experimental', 'thg.displaynames', None) is None: repo.ui.setconfig('experimental', 'thg.displaynames', 'topics', source='topic-extension') @@ -191,6 +192,11 @@ self.ui.restoreconfig(backup) def commitctx(self, ctx, error=None): + topicfilter = topicmap.topicfilter(self.filtername) + if topicfilter != self.filtername: + other = repo.filtered(topicmap.topicfilter(repo.filtername)) + other.commitctx(ctx, error=error) + if isinstance(ctx, context.workingcommitctx): current = self.currenttopic if current: @@ -201,8 +207,7 @@ not self.currenttopic): # we are amending and need to remove a topic del ctx.extra()[constants.extrakey] - with topicmap.usetopicmap(self): - return super(topicrepo, self).commitctx(ctx, error=error) + return super(topicrepo, self).commitctx(ctx, error=error) @property def topics(self): @@ -219,23 +224,21 @@ def currenttopic(self): return self.vfs.tryread('topic') - def branchmap(self, topic=True): - if not topic: - super(topicrepo, self).branchmap() - with topicmap.usetopicmap(self): - branchmap.updatecache(self) - return self._topiccaches[self.filtername] + # overwritten at the instance level by topicmap.py + _autobranchmaptopic = True - def destroyed(self, *args, **kwargs): - with topicmap.usetopicmap(self): - return super(topicrepo, self).destroyed(*args, **kwargs) + def branchmap(self, topic=None): + if topic is None: + topic = self._autobranchmaptopic + topicfilter = topicmap.topicfilter(self.filtername) + if not topic or topicfilter == self.filtername: + return super(topicrepo, self).branchmap() + return self.filtered(topicfilter).branchmap() def invalidatevolatilesets(self): # XXX we might be able to move this to something invalidated less often super(topicrepo, self).invalidatevolatilesets() self._topics = None - if '_topiccaches' in vars(self.unfiltered()): - self.unfiltered()._topiccaches.clear() def peer(self): peer = super(topicrepo, self).peer() diff -r 839c2879edcc -r 13313d0cab71 hgext3rd/topic/destination.py --- a/hgext3rd/topic/destination.py Thu Jun 22 09:47:14 2017 +0200 +++ b/hgext3rd/topic/destination.py Thu Jun 22 10:13:29 2017 +0200 @@ -89,14 +89,14 @@ # but that is expensive # # we should write plain code instead - with topicmap.usetopicmap(repo): - tmap = repo.branchmap() - if branch not in tmap: - return [] - elif all: - return tmap.branchheads(branch) - else: - return [tmap.branchtip(branch)] + + tmap = topicmap.gettopicrepo(repo).branchmap() + if branch not in tmap: + return [] + elif all: + return tmap.branchheads(branch) + else: + return [tmap.branchtip(branch)] def modsetup(ui): """run a uisetup time to install all destinations wrapping""" diff -r 839c2879edcc -r 13313d0cab71 hgext3rd/topic/discovery.py --- a/hgext3rd/topic/discovery.py Thu Jun 22 09:47:14 2017 +0200 +++ b/hgext3rd/topic/discovery.py Thu Jun 22 10:13:29 2017 +0200 @@ -4,7 +4,6 @@ from mercurial.i18n import _ from mercurial import ( - branchmap, bundle2, discovery, error, @@ -13,15 +12,15 @@ wireproto, ) -from . import topicmap - def _headssummary(orig, *args): # In mercurial < 4.2, we receive repo, remote and outgoing as arguments if len(args) == 3: + pushoparg = False repo, remote, outgoing = args # In mercurial > 4.3, we receive the pushop as arguments elif len(args) == 1: + pushoparg = True pushop = args[0] repo = pushop.repo.unfiltered() remote = pushop.remote @@ -33,38 +32,44 @@ or bool(remote.listkeys('phases').get('publishing', False))) if publishing or not remote.capable('topics'): return orig(*args) - oldrepo = repo.__class__ - oldbranchcache = branchmap.branchcache - oldfilename = branchmap._filename - try: - class repocls(repo.__class__): - def __getitem__(self, key): - ctx = super(repocls, self).__getitem__(key) - oldbranch = ctx.branch + + class repocls(repo.__class__): + def __getitem__(self, key): + ctx = super(repocls, self).__getitem__(key) + oldbranch = ctx.branch + + def branch(): + branch = oldbranch() + topic = ctx.topic() + if topic: + branch = "%s:%s" % (branch, topic) + return branch - def branch(): - branch = oldbranch() - topic = ctx.topic() - if topic: - branch = "%s:%s" % (branch, topic) - return branch + ctx.branch = branch + return ctx - ctx.branch = branch - return ctx - + oldrepo = repo.__class__ + try: repo.__class__ = repocls - branchmap.branchcache = topicmap.topiccache - branchmap._filename = topicmap._filename - summary = orig(*args) + unxx = repo.filtered('unfiltered-topic') + repo.unfiltered = lambda: unxx + if pushoparg: + try: + pushop.repo = repo + summary = orig(pushop) + finally: + pushop.repo = repo + else: + summary = orig(repo, remote, outgoing) for key, value in summary.iteritems(): if ':' in key: # This is a topic if value[0] is None and value[1]: summary[key] = ([value[1].pop(0)], ) + value[1:] return summary finally: + if 'unfiltered' in vars(repo): + del repo.unfiltered repo.__class__ = oldrepo - branchmap.branchcache = oldbranchcache - branchmap._filename = oldfilename def wireprotobranchmap(orig, repo, proto): oldrepo = repo.__class__ diff -r 839c2879edcc -r 13313d0cab71 hgext3rd/topic/topicmap.py --- a/hgext3rd/topic/topicmap.py Thu Jun 22 09:47:14 2017 +0200 +++ b/hgext3rd/topic/topicmap.py Thu Jun 22 10:13:29 2017 +0200 @@ -1,27 +1,63 @@ import contextlib import hashlib -from mercurial.node import hex, bin, nullid +from mercurial.node import nullid from mercurial import ( branchmap, changegroup, cmdutil, - encoding, - error, extensions, - scmutil, + repoview, ) -def _filename(repo): - """name of a branchcache file for a given repo or repoview""" - filename = "cache/topicmap" - if repo.filtername: - filename = '%s-%s' % (filename, repo.filtername) - return filename +basefilter = set(['base', 'immutable']) +def topicfilter(name): + """return a "topic" version of a filter level""" + if name in basefilter: + return name + elif name is None: + return None + elif name.endswith('-topic'): + return name + else: + return name + '-topic' + +def istopicfilter(filtername): + if filtername is None: + return False + return filtername.endswith('-topic') + +def gettopicrepo(repo): + filtername = topicfilter(repo.filtername) + if filtername == repo.filtername: + return repo + return repo.filtered(filtername) -oldbranchcache = branchmap.branchcache +def _setuptopicfilter(ui): + """extend the filter related mapping with topic related one""" + funcmap = repoview.filtertable + partialmap = branchmap.subsettable + + # filter level not affected by topic that we should not override + + for plainname in list(funcmap): + newfilter = topicfilter(plainname) + if newfilter == plainname: + continue + + def revsfunc(repo, name=plainname): + return repoview.filterrevs(repo, name) + + base = topicfilter(partialmap[plainname]) + + if newfilter not in funcmap: + funcmap[newfilter] = revsfunc + partialmap[newfilter] = base + funcmap['unfiltered-topic'] = lambda repo: frozenset() + partialmap['unfiltered-topic'] = 'visible-topic' def _phaseshash(repo, maxrev): + """uniq ID for a phase matching a set of rev""" revs = set() cl = repo.changelog fr = cl.filteredrevs @@ -40,61 +76,65 @@ key = s.digest() return key -@contextlib.contextmanager -def usetopicmap(repo): - """use awful monkey patching to ensure topic map usage +def modsetup(ui): + """call at uisetup time to install various wrappings""" + _setuptopicfilter(ui) + _wrapbmcache(ui) + extensions.wrapfunction(changegroup.cg1unpacker, 'apply', cgapply) + extensions.wrapfunction(cmdutil, 'commitstatus', commitstatus) - During the extend of the context block, The topicmap should be used and - updated instead of the branchmap.""" - oldbranchcache = branchmap.branchcache - oldfilename = branchmap._filename - oldread = branchmap.read - oldcaches = getattr(repo, '_branchcaches', {}) - try: - branchmap.branchcache = topiccache - branchmap._filename = _filename - branchmap.read = readtopicmap - repo._branchcaches = getattr(repo, '_topiccaches', {}) - yield - repo._topiccaches = repo._branchcaches - finally: - repo._branchcaches = oldcaches - branchmap.branchcache = oldbranchcache - branchmap._filename = oldfilename - branchmap.read = oldread - -def cgapply(orig, repo, *args, **kwargs): +def cgapply(orig, self, repo, *args, **kwargs): """make sure a topicmap is used when applying a changegroup""" - with usetopicmap(repo): - return orig(repo, *args, **kwargs) + other = repo.filtered(topicfilter(repo.filtername)) + return orig(self, other, *args, **kwargs) def commitstatus(orig, repo, node, branch, bheads=None, opts=None): # wrap commit status use the topic branch heads ctx = repo[node] if ctx.topic() and ctx.branch() == branch: - bheads = repo.branchheads("%s:%s" % (branch, ctx.topic())) + subbranch = "%s:%s" % (branch, ctx.topic()) + bheads = repo.branchheads("%s:%s" % (subbranch, ctx.topic())) return orig(repo, node, branch, bheads=bheads, opts=opts) -class topiccache(oldbranchcache): +def _wrapbmcache(ui): + class topiccache(_topiccache, branchmap.branchcache): + pass + branchmap.branchcache = topiccache + extensions.wrapfunction(branchmap, 'updatecache', _wrapupdatebmcache) + +def _wrapupdatebmcache(orig, repo): + previous = repo._autobranchmaptopic + try: + repo._autobranchmaptopic = False + return orig(repo) + finally: + repo._autobranchmaptopic = previous + +# needed to prevent reference used for 'super()' call using in branchmap.py to +# no go into cycle. (yes, URG) +_oldbranchmap = branchmap.branchcache + +@contextlib.contextmanager +def oldbranchmap(): + previous = branchmap.branchcache + try: + branchmap.branchcache = _oldbranchmap + yield + finally: + branchmap.branchcache = previous + +class _topiccache(object): # combine me with branchmap.branchcache def __init__(self, *args, **kwargs): - otherbranchcache = branchmap.branchcache - try: - # super() call may fail otherwise - branchmap.branchcache = oldbranchcache - super(topiccache, self).__init__(*args, **kwargs) - if self.filteredhash is None: - self.filteredhash = nullid - self.phaseshash = nullid - finally: - branchmap.branchcache = otherbranchcache + # super() call may fail otherwise + with oldbranchmap(): + super(_topiccache, self).__init__(*args, **kwargs) + self.phaseshash = None def copy(self): """return an deep copy of the branchcache object""" - new = topiccache(self, self.tipnode, self.tiprev, self.filteredhash, - self._closednodes) - if self.filteredhash is None: - self.filteredhash = nullid + new = self.__class__(self, self.tipnode, self.tiprev, self.filteredhash, + self._closednodes) new.phaseshash = self.phaseshash return new @@ -104,144 +144,61 @@ Raise KeyError for unknown branch.''' if topic: branch = '%s:%s' % (branch, topic) - return super(topiccache, self).branchtip(branch) + return super(_topiccache, self).branchtip(branch) def branchheads(self, branch, closed=False, topic=''): if topic: branch = '%s:%s' % (branch, topic) - return super(topiccache, self).branchheads(branch, closed=closed) + return super(_topiccache, self).branchheads(branch, closed=closed) def validfor(self, repo): """Is the cache content valid regarding a repo - False when cached tipnode is unknown or if we detect a strip. - True when cache is up to date or a subset of current repo.""" - # This is copy paste of mercurial.branchmap.branchcache.validfor in - # 69077c65919d With a small changes to the cache key handling to - # include phase information that impact the topic cache. - # - # All code changes should be flagged on site. - try: - if (self.tipnode == repo.changelog.node(self.tiprev)): - fh = scmutil.filteredhash(repo, self.tiprev) - if fh is None: - fh = nullid - if ((self.filteredhash == fh) - and (self.phaseshash == _phaseshash(repo, self.tiprev))): - return True + valid = super(_topiccache, self).validfor(repo) + if not valid: return False - except IndexError: - return False + elif not istopicfilter(repo.filtername) or self.phaseshash is None: + # phasehash at None means this is a branchmap + # come from non topic thing + return True + else: + try: + valid = self.phaseshash == _phaseshash(repo, self.tiprev) + return valid + except IndexError: + return False def write(self, repo): - # This is copy paste of mercurial.branchmap.branchcache.write in - # 69077c65919d With a small changes to the cache key handling to - # include phase information that impact the topic cache. - # - # All code changes should be flagged on site. - try: - f = repo.vfs(_filename(repo), "w", atomictemp=True) - cachekey = [hex(self.tipnode), str(self.tiprev)] - # [CHANGE] we need a hash in all cases - assert self.filteredhash is not None - cachekey.append(hex(self.filteredhash)) - cachekey.append(hex(self.phaseshash)) - f.write(" ".join(cachekey) + '\n') - nodecount = 0 - for label, nodes in sorted(self.iteritems()): - for node in nodes: - nodecount += 1 - if node in self._closednodes: - state = 'c' - else: - state = 'o' - f.write("%s %s %s\n" % (hex(node), state, - encoding.fromlocal(label))) - f.close() - repo.ui.log('branchcache', - 'wrote %s branch cache with %d labels and %d nodes\n', - repo.filtername, len(self), nodecount) - except (IOError, OSError, error.Abort) as inst: - repo.ui.debug("couldn't write branch cache: %s\n" % inst) - # Abort may be raise by read only opener - pass + # we expect mutable set to be small enough to be that computing it all + # the time will be fast enough + if not istopicfilter(repo.filtername): + super(_topiccache, self).write(repo) def update(self, repo, revgen): """Given a branchhead cache, self, that may have extra nodes or be missing heads, and a generator of nodes that are strictly a superset of heads missing, this function updates self to be correct. """ - oldgetbranchinfo = repo.revbranchcache().branchinfo + if not istopicfilter(repo.filtername): + return super(_topiccache, self).update(repo, revgen) + unfi = repo.unfiltered() + oldgetbranchinfo = unfi.revbranchcache().branchinfo + + def branchinfo(r): + info = oldgetbranchinfo(r) + topic = '' + ctx = unfi[r] + if ctx.mutable(): + topic = ctx.topic() + branch = info[0] + if topic: + branch = '%s:%s' % (branch, topic) + return (branch, info[1]) try: - def branchinfo(r): - info = oldgetbranchinfo(r) - topic = '' - ctx = repo[r] - if ctx.mutable(): - topic = ctx.topic() - branch = info[0] - if topic: - branch = '%s:%s' % (branch, topic) - return (branch, info[1]) - repo.revbranchcache().branchinfo = branchinfo - super(topiccache, self).update(repo, revgen) - if self.filteredhash is None: - self.filteredhash = nullid + unfi.revbranchcache().branchinfo = branchinfo + super(_topiccache, self).update(repo, revgen) self.phaseshash = _phaseshash(repo, self.tiprev) finally: - repo.revbranchcache().branchinfo = oldgetbranchinfo - -def readtopicmap(repo): - # This is copy paste of mercurial.branchmap.read in 69077c65919d - # With a small changes to the cache key handling to include phase - # information that impact the topic cache. - # - # All code changes should be flagged on site. - try: - f = repo.vfs(_filename(repo)) - lines = f.read().split('\n') - f.close() - except (IOError, OSError): - return None - - try: - cachekey = lines.pop(0).split(" ", 2) - last, lrev = cachekey[:2] - last, lrev = bin(last), int(lrev) - filteredhash = bin(cachekey[2]) # [CHANGE] unconditional filteredhash - partial = topiccache(tipnode=last, tiprev=lrev, - filteredhash=filteredhash) - partial.phaseshash = bin(cachekey[3]) # [CHANGE] read phaseshash - if not partial.validfor(repo): - # invalidate the cache - raise ValueError('tip differs') - cl = repo.changelog - for l in lines: - if not l: - continue - node, state, label = l.split(" ", 2) - if state not in 'oc': - raise ValueError('invalid branch state') - label = encoding.tolocal(label.strip()) - node = bin(node) - if not cl.hasnode(node): - raise ValueError('node %s does not exist' % hex(node)) - partial.setdefault(label, []).append(node) - if state == 'c': - partial._closednodes.add(node) - except KeyboardInterrupt: - raise - except Exception as inst: - if repo.ui.debugflag: - msg = 'invalid branchheads cache' - if repo.filtername is not None: - msg += ' (%s)' % repo.filtername - msg += ': %s\n' - repo.ui.debug(msg % inst) - partial = None - return partial - -def modsetup(ui): - """call at uisetup time to install various wrappings""" - extensions.wrapfunction(changegroup.cg1unpacker, 'apply', cgapply) - extensions.wrapfunction(cmdutil, 'commitstatus', commitstatus) + unfi.revbranchcache().branchinfo = oldgetbranchinfo