hgext/remotefilelog/connectionpool.py
author Yuya Nishihara <yuya@tcha.org>
Wed, 31 Oct 2018 22:19:03 +0900
changeset 41008 042ed354b9eb
parent 40545 3a333a582d7b
child 43076 2372284d9457
permissions -rw-r--r--
commandserver: add IPC channel to teach repository path on command finished The idea is to load recently-used repositories first in the master process, and fork(). The forked worker can reuse a warm repository if it's preloaded. There are a couple of ways of in-memory repository caching. They have pros and cons: a. "preload by master" pros: can use a single cache dict, maximizing cache hit rate cons: need to reload a repo in master process (because worker process dies per command) b. "prefork" pros: can cache a repo without reloading (as worker processes persist) cons: lower cache hit rate since each worker has to maintain its own cache c. "shared memory" (or separate key-value store server) pros: no need to reload a repo in master process, ideally cons: need to serialize objects to sharable form Since my primary goal is to get rid of the cost of loading obsstore without massive rewrites, (c) doesn't work. (b) isn't ideal since it would require much more SDRAMs than (a). So I take (a). The idea credits to Jun Wu.

# connectionpool.py - class for pooling peer connections for reuse
#
# Copyright 2017 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

from mercurial import (
    extensions,
    hg,
    sshpeer,
    util,
)

_sshv1peer = sshpeer.sshv1peer

class connectionpool(object):
    def __init__(self, repo):
        self._repo = repo
        self._pool = dict()

    def get(self, path):
        pathpool = self._pool.get(path)
        if pathpool is None:
            pathpool = list()
            self._pool[path] = pathpool

        conn = None
        if len(pathpool) > 0:
            try:
                conn = pathpool.pop()
                peer = conn.peer
                # If the connection has died, drop it
                if isinstance(peer, _sshv1peer):
                    if peer._subprocess.poll() is not None:
                        conn = None
            except IndexError:
                pass

        if conn is None:
            def _cleanup(orig):
                # close pipee first so peer.cleanup reading it won't deadlock,
                # if there are other processes with pipeo open (i.e. us).
                peer = orig.im_self
                if util.safehasattr(peer, 'pipee'):
                    peer.pipee.close()
                return orig()

            peer = hg.peer(self._repo.ui, {}, path)
            if util.safehasattr(peer, 'cleanup'):
                extensions.wrapfunction(peer, 'cleanup', _cleanup)

            conn = connection(pathpool, peer)

        return conn

    def close(self):
        for pathpool in self._pool.itervalues():
            for conn in pathpool:
                conn.close()
            del pathpool[:]

class connection(object):
    def __init__(self, pool, peer):
        self._pool = pool
        self.peer = peer

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        # Only add the connection back to the pool if there was no exception,
        # since an exception could mean the connection is not in a reusable
        # state.
        if type is None:
            self._pool.append(self)
        else:
            self.close()

    def close(self):
        if util.safehasattr(self.peer, 'cleanup'):
            self.peer.cleanup()