sparse-read: move from a recursive-based approach to a heap-based one
The previous recursive approach was trying to optimise each read slice to have
a good density. It had the tendency to over-optimize smaller slices while
leaving larger hole in others.
The new approach focuses on improving the combined density of all the reads,
instead of the individual slices. It slices at the largest gaps first, as they
reduce the total amount of read data the most efficiently.
Another benefit of this approach is that we iterate over the delta chain only
once, reducing the overhead of slicing long delta chains.
On the repository we use for tests, the new approach shows similar or faster
performance than the current default linear full read.
The repository contains about 450,000 revisions with many concurrent
topological branches. Tests have been run on two versions of the repository:
one built with the current delta constraint, and the other with an unlimited
delta span (using 'experimental.maxdeltachainspan=0')
Below are timings for building 1% of all the revision in the manifest log using
'hg perfrevlogrevisions -m'. Times are given in seconds. They include the new
couple of follow-up changeset in this series.
delta-span standard unlimited
linear-read 922s 632s
sparse-read 814s 566s
# sshserver.py - ssh protocol server support for mercurial
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import sys
from .i18n import _
from . import (
encoding,
error,
hook,
util,
wireproto,
)
class sshserver(wireproto.abstractserverproto):
def __init__(self, ui, repo):
self.ui = ui
self.repo = repo
self.lock = None
self.fin = ui.fin
self.fout = ui.fout
self.name = 'ssh'
hook.redirect(True)
ui.fout = repo.ui.fout = ui.ferr
# Prevent insertion/deletion of CRs
util.setbinary(self.fin)
util.setbinary(self.fout)
def getargs(self, args):
data = {}
keys = args.split()
for n in xrange(len(keys)):
argline = self.fin.readline()[:-1]
arg, l = argline.split()
if arg not in keys:
raise error.Abort(_("unexpected parameter %r") % arg)
if arg == '*':
star = {}
for k in xrange(int(l)):
argline = self.fin.readline()[:-1]
arg, l = argline.split()
val = self.fin.read(int(l))
star[arg] = val
data['*'] = star
else:
val = self.fin.read(int(l))
data[arg] = val
return [data[k] for k in keys]
def getarg(self, name):
return self.getargs(name)[0]
def getfile(self, fpout):
self.sendresponse('')
count = int(self.fin.readline())
while count:
fpout.write(self.fin.read(count))
count = int(self.fin.readline())
def redirect(self):
pass
def sendresponse(self, v):
self.fout.write("%d\n" % len(v))
self.fout.write(v)
self.fout.flush()
def sendstream(self, source):
write = self.fout.write
if source.reader:
gen = iter(lambda: source.reader.read(4096), '')
else:
gen = source.gen
for chunk in gen:
write(chunk)
self.fout.flush()
def sendpushresponse(self, rsp):
self.sendresponse('')
self.sendresponse(str(rsp.res))
def sendpusherror(self, rsp):
self.sendresponse(rsp.res)
def sendooberror(self, rsp):
self.ui.ferr.write('%s\n-\n' % rsp.message)
self.ui.ferr.flush()
self.fout.write('\n')
self.fout.flush()
def serve_forever(self):
try:
while self.serve_one():
pass
finally:
if self.lock is not None:
self.lock.release()
sys.exit(0)
handlers = {
str: sendresponse,
wireproto.streamres: sendstream,
wireproto.pushres: sendpushresponse,
wireproto.pusherr: sendpusherror,
wireproto.ooberror: sendooberror,
}
def serve_one(self):
cmd = self.fin.readline()[:-1]
if cmd and cmd in wireproto.commands:
rsp = wireproto.dispatch(self.repo, self, cmd)
self.handlers[rsp.__class__](self, rsp)
elif cmd:
impl = getattr(self, 'do_' + cmd, None)
if impl:
r = impl()
if r is not None:
self.sendresponse(r)
else:
self.sendresponse("")
return cmd != ''
def _client(self):
client = encoding.environ.get('SSH_CLIENT', '').split(' ', 1)[0]
return 'remote:ssh:' + client