Mercurial > hg
view mercurial/sshpeer.py @ 23785:cb99bacb9b4e
branchcache: introduce revbranchcache for caching of revision branch names
It is expensive to retrieve the branch name of a revision. Very expensive when
creating a changectx and calling .branch() every time - slightly less when
using changelog.branchinfo().
Now, to speed things up, provide a way to cache the results on disk in an
efficient format. Each branchname is assigned a number, and for each revision
we store the number of the corresponding branch name. The branch names are
stored in a dedicated file which is strictly append only.
Branch names are usually reused across several revisions, and the total list of
branch names will thus be so small that it is feasible to read the whole set of
names before using the cache. It will however do that it might be more
efficient to use the changelog for retrieving the branch info for a single
revision.
The revision entries are stored in another file. This file is usually append
only, but if the repository has been modified, the file will be truncated and
the relevant parts rewritten on demand.
The entries for each revision are 8 bytes each, and the whole revision file
will thus be 1/8 of 00changelog.i.
Each revision entry contains the first 4 bytes of the corresponding node hash.
This is used as a check sum that always is verified before the entry is used.
That check is relatively expensive but it makes sure history modification is
detected and handled correctly. It will also detect and handle most revision
file corruptions.
This is just a cache. A new format can always be introduced if other
requirements or ideas make that seem like a good idea. Rebuilding the cache is
not really more expensive than it was to run for example 'hg log -b branchname'
before this cache was introduced.
This new method is still unused but promise to make some operations several
times faster once it actually is used.
Abandoning Python 2.4 would make it possible to implement this more efficiently
by using struct classes and pack_into. The Python code could probably also be
micro optimized or it could be implemented very efficiently in C where it would
be easy to control the data access.
author | Mads Kiilerich <madski@unity3d.com> |
---|---|
date | Thu, 08 Jan 2015 00:01:03 +0100 |
parents | 14ac0c1579cd |
children | d65243d28749 |
line wrap: on
line source
# sshpeer.py - ssh repository proxy class for mercurial # # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import re from i18n import _ import util, error, wireproto class remotelock(object): def __init__(self, repo): self.repo = repo def release(self): self.repo.unlock() self.repo = None def __del__(self): if self.repo: self.release() def _serverquote(s): if not s: return s '''quote a string for the remote shell ... which we assume is sh''' if re.match('[a-zA-Z0-9@%_+=:,./-]*$', s): return s return "'%s'" % s.replace("'", "'\\''") class sshpeer(wireproto.wirepeer): def __init__(self, ui, path, create=False): self._url = path self.ui = ui self.pipeo = self.pipei = self.pipee = None u = util.url(path, parsequery=False, parsefragment=False) if u.scheme != 'ssh' or not u.host or u.path is None: self._abort(error.RepoError(_("couldn't parse location %s") % path)) self.user = u.user if u.passwd is not None: self._abort(error.RepoError(_("password in URL not supported"))) self.host = u.host self.port = u.port self.path = u.path or "." sshcmd = self.ui.config("ui", "ssh", "ssh") remotecmd = self.ui.config("ui", "remotecmd", "hg") args = util.sshargs(sshcmd, _serverquote(self.host), _serverquote(self.user), _serverquote(self.port)) if create: cmd = '%s %s %s' % (sshcmd, args, util.shellquote("%s init %s" % (_serverquote(remotecmd), _serverquote(self.path)))) ui.debug('running %s\n' % cmd) res = ui.system(cmd) if res != 0: self._abort(error.RepoError(_("could not create remote repo"))) self._validaterepo(sshcmd, args, remotecmd) def url(self): return self._url def _validaterepo(self, sshcmd, args, remotecmd): # cleanup up previous run self.cleanup() cmd = '%s %s %s' % (sshcmd, args, util.shellquote("%s -R %s serve --stdio" % (_serverquote(remotecmd), _serverquote(self.path)))) self.ui.debug('running %s\n' % cmd) cmd = util.quotecommand(cmd) # while self.subprocess isn't used, having it allows the subprocess to # to clean up correctly later self.pipeo, self.pipei, self.pipee, self.subprocess = util.popen4(cmd) # skip any noise generated by remote shell self._callstream("hello") r = self._callstream("between", pairs=("%s-%s" % ("0"*40, "0"*40))) lines = ["", "dummy"] max_noise = 500 while lines[-1] and max_noise: l = r.readline() self.readerr() if lines[-1] == "1\n" and l == "\n": break if l: self.ui.debug("remote: ", l) lines.append(l) max_noise -= 1 else: self._abort(error.RepoError(_('no suitable response from ' 'remote hg'))) self._caps = set() for l in reversed(lines): if l.startswith("capabilities:"): self._caps.update(l[:-1].split(":")[1].split()) break def _capabilities(self): return self._caps def readerr(self): s = util.readpipe(self.pipee) if s: for l in s.splitlines(): self.ui.status(_("remote: "), l, '\n') def _abort(self, exception): self.cleanup() raise exception def cleanup(self): if self.pipeo is None: return self.pipeo.close() self.pipei.close() try: # read the error descriptor until EOF for l in self.pipee: self.ui.status(_("remote: "), l) except (IOError, ValueError): pass self.pipee.close() __del__ = cleanup def _callstream(self, cmd, **args): self.ui.debug("sending %s command\n" % cmd) self.pipeo.write("%s\n" % cmd) _func, names = wireproto.commands[cmd] keys = names.split() wireargs = {} for k in keys: if k == '*': wireargs['*'] = args break else: wireargs[k] = args[k] del args[k] for k, v in sorted(wireargs.iteritems()): self.pipeo.write("%s %d\n" % (k, len(v))) if isinstance(v, dict): for dk, dv in v.iteritems(): self.pipeo.write("%s %d\n" % (dk, len(dv))) self.pipeo.write(dv) else: self.pipeo.write(v) self.pipeo.flush() return self.pipei def _callcompressable(self, cmd, **args): return self._callstream(cmd, **args) def _call(self, cmd, **args): self._callstream(cmd, **args) return self._recv() def _callpush(self, cmd, fp, **args): r = self._call(cmd, **args) if r: return '', r while True: d = fp.read(4096) if not d: break self._send(d) self._send("", flush=True) r = self._recv() if r: return '', r return self._recv(), '' def _calltwowaystream(self, cmd, fp, **args): r = self._call(cmd, **args) if r: # XXX needs to be made better raise util.Abort('unexpected remote reply: %s' % r) while True: d = fp.read(4096) if not d: break self._send(d) self._send("", flush=True) return self.pipei def _recv(self): l = self.pipei.readline() if l == '\n': err = [] while True: line = self.pipee.readline() if line == '-\n': break err.extend([line]) if len(err) > 0: # strip the trailing newline added to the last line server-side err[-1] = err[-1][:-1] self._abort(error.OutOfBandError(*err)) self.readerr() try: l = int(l) except ValueError: self._abort(error.ResponseError(_("unexpected response:"), l)) return self.pipei.read(l) def _send(self, data, flush=False): self.pipeo.write("%d\n" % len(data)) if data: self.pipeo.write(data) if flush: self.pipeo.flush() self.readerr() def lock(self): self._call("lock") return remotelock(self) def unlock(self): self._call("unlock") def addchangegroup(self, cg, source, url, lock=None): '''Send a changegroup to the remote server. Return an integer similar to unbundle(). DEPRECATED, since it requires locking the remote.''' d = self._call("addchangegroup") if d: self._abort(error.RepoError(_("push refused: %s") % d)) while True: d = cg.read(4096) if not d: break self.pipeo.write(d) self.readerr() self.pipeo.flush() self.readerr() r = self._recv() if not r: return 1 try: return int(r) except ValueError: self._abort(error.ResponseError(_("unexpected response:"), r)) instance = sshpeer