Mercurial > hg-stable
view mercurial/treediscovery.py @ 35767:5f5fb279fd39
streamclone: also stream caches to the client
When stream clone is used over bundle2, relevant cache files are also streamed.
This is expected to be a massive performance win for clone since no important
cache will have to be recomputed.
Some performance numbers:
(All times are wall-clock times in seconds, 2 attempts per case.)
# Mozilla-Central
## Clone over ssh over lan
V1 streaming: 234.3 239.6
V2 streaming: 248.4 243.7
## Clone over ssh over Internet
V1 streaming: 175.5 110.9
V2 streaming: 109.1 111.0
## Clone over HTTP over lan
V1 streaming: 105.3 105.6
V2 streaming: 112.7 111.4
## Clone over HTTP over internet
V1 streaming: 105.6 114.6
V2 streaming: 226.7 225.9
## Hg tags
V1 streaming (no cache): 1.084 1.071
V2 streaming (cache): 0.312 0.325
## Hg branches
V1 streaming (no cache): 14.047 14.148
V2 streaming (with cache): 0.312 0.333
# Pypy
## Clone over ssh over internet
V1 streaming: 29.4 30.1
V2 streaming: 31.2 30.1
## Clone over http over internet
V1 streaming: 29.7 29.7
V2 streaming: 75.2 72.9
(since ssh and lan are not affected, there seems to be an issue with how we
read/write the http stream on connection with latency, unrelated to the format)
## Hg tags
V1 streaming (no cache): 1.752 1.664
V2 streaming (with cache): 0.274 0.260
## Hg branches
V1 streaming (no cache): 4.469 4.728
V2 streaming (with cache): 0.318 0.321
# Private repository:
* 500K revision revisions
* 11K topological heads
* 28K branch heads
## hg tags
no cache: 1543.332
with cache: 4.900
## hg branches
no cache: 91.828
with cache: 2.955
author | Boris Feld <boris.feld@octobus.net> |
---|---|
date | Thu, 18 Jan 2018 00:50:12 +0100 |
parents | 56b2bcea2529 |
children | 0ed11f9368fd |
line wrap: on
line source
# discovery.py - protocol changeset discovery functions # # Copyright 2010 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import import collections from .i18n import _ from .node import ( nullid, short, ) from . import ( error, ) def findcommonincoming(repo, remote, heads=None, force=False): """Return a tuple (common, fetch, heads) used to identify the common subset of nodes between repo and remote. "common" is a list of (at least) the heads of the common subset. "fetch" is a list of roots of the nodes that would be incoming, to be supplied to changegroupsubset. "heads" is either the supplied heads, or else the remote's heads. """ knownnode = repo.changelog.hasnode search = [] fetch = set() seen = set() seenbranch = set() base = set() if not heads: heads = remote.heads() if repo.changelog.tip() == nullid: base.add(nullid) if heads != [nullid]: return [nullid], [nullid], list(heads) return [nullid], [], heads # assume we're closer to the tip than the root # and start by examining the heads repo.ui.status(_("searching for changes\n")) unknown = [] for h in heads: if not knownnode(h): unknown.append(h) else: base.add(h) if not unknown: return list(base), [], list(heads) req = set(unknown) reqcnt = 0 # search through remote branches # a 'branch' here is a linear segment of history, with four parts: # head, root, first parent, second parent # (a branch always has two parents (or none) by definition) unknown = collections.deque(remote.branches(unknown)) while unknown: r = [] while unknown: n = unknown.popleft() if n[0] in seen: continue repo.ui.debug("examining %s:%s\n" % (short(n[0]), short(n[1]))) if n[0] == nullid: # found the end of the branch pass elif n in seenbranch: repo.ui.debug("branch already found\n") continue elif n[1] and knownnode(n[1]): # do we know the base? repo.ui.debug("found incomplete branch %s:%s\n" % (short(n[0]), short(n[1]))) search.append(n[0:2]) # schedule branch range for scanning seenbranch.add(n) else: if n[1] not in seen and n[1] not in fetch: if knownnode(n[2]) and knownnode(n[3]): repo.ui.debug("found new changeset %s\n" % short(n[1])) fetch.add(n[1]) # earliest unknown for p in n[2:4]: if knownnode(p): base.add(p) # latest known for p in n[2:4]: if p not in req and not knownnode(p): r.append(p) req.add(p) seen.add(n[0]) if r: reqcnt += 1 repo.ui.progress(_('searching'), reqcnt, unit=_('queries')) repo.ui.debug("request %d: %s\n" % (reqcnt, " ".join(map(short, r)))) for p in xrange(0, len(r), 10): for b in remote.branches(r[p:p + 10]): repo.ui.debug("received %s:%s\n" % (short(b[0]), short(b[1]))) unknown.append(b) # do binary search on the branches we found while search: newsearch = [] reqcnt += 1 repo.ui.progress(_('searching'), reqcnt, unit=_('queries')) for n, l in zip(search, remote.between(search)): l.append(n[1]) p = n[0] f = 1 for i in l: repo.ui.debug("narrowing %d:%d %s\n" % (f, len(l), short(i))) if knownnode(i): if f <= 2: repo.ui.debug("found new branch changeset %s\n" % short(p)) fetch.add(p) base.add(i) else: repo.ui.debug("narrowed branch search to %s:%s\n" % (short(p), short(i))) newsearch.append((p, i)) break p, f = i, f * 2 search = newsearch # sanity check our fetch list for f in fetch: if knownnode(f): raise error.RepoError(_("already have changeset ") + short(f[:4])) base = list(base) if base == [nullid]: if force: repo.ui.warn(_("warning: repository is unrelated\n")) else: raise error.Abort(_("repository is unrelated")) repo.ui.debug("found new changesets starting at " + " ".join([short(f) for f in fetch]) + "\n") repo.ui.progress(_('searching'), None) repo.ui.debug("%d total queries\n" % reqcnt) return base, list(fetch), heads