mercurial/httppeer.py
author Matt Harbison <matt_harbison@yahoo.com>
Sun, 20 Aug 2023 15:30:39 -0400
changeset 50871 0c76520ec4cc
parent 50666 60f9602b413e
child 50928 d718eddf01d9
child 50990 758e40b9a8af
permissions -rw-r--r--
children: migrate `opts` to native kwargs

# httppeer.py - HTTP repository proxy classes for mercurial
#
# Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.


import errno
import io
import os
import socket
import struct

from concurrent import futures
from .i18n import _
from .pycompat import getattr
from . import (
    bundle2,
    error,
    httpconnection,
    pycompat,
    statichttprepo,
    url as urlmod,
    util,
    wireprotov1peer,
)
from .utils import urlutil

httplib = util.httplib
urlerr = util.urlerr
urlreq = util.urlreq


def encodevalueinheaders(value, header, limit):
    """Encode a string value into multiple HTTP headers.

    ``value`` will be encoded into 1 or more HTTP headers with the names
    ``header-<N>`` where ``<N>`` is an integer starting at 1. Each header
    name + value will be at most ``limit`` bytes long.

    Returns an iterable of 2-tuples consisting of header names and
    values as native strings.
    """
    # HTTP Headers are ASCII. Python 3 requires them to be unicodes,
    # not bytes. This function always takes bytes in as arguments.
    fmt = pycompat.strurl(header) + r'-%s'
    # Note: it is *NOT* a bug that the last bit here is a bytestring
    # and not a unicode: we're just getting the encoded length anyway,
    # and using an r-string to make it portable between Python 2 and 3
    # doesn't work because then the \r is a literal backslash-r
    # instead of a carriage return.
    valuelen = limit - len(fmt % '000') - len(b': \r\n')
    result = []

    n = 0
    for i in range(0, len(value), valuelen):
        n += 1
        result.append((fmt % str(n), pycompat.strurl(value[i : i + valuelen])))

    return result


class _multifile:
    def __init__(self, *fileobjs):
        for f in fileobjs:
            if not util.safehasattr(f, 'length'):
                raise ValueError(
                    b'_multifile only supports file objects that '
                    b'have a length but this one does not:',
                    type(f),
                    f,
                )
        self._fileobjs = fileobjs
        self._index = 0

    @property
    def length(self):
        return sum(f.length for f in self._fileobjs)

    def read(self, amt=None):
        if amt <= 0:
            return b''.join(f.read() for f in self._fileobjs)
        parts = []
        while amt and self._index < len(self._fileobjs):
            parts.append(self._fileobjs[self._index].read(amt))
            got = len(parts[-1])
            if got < amt:
                self._index += 1
            amt -= got
        return b''.join(parts)

    def seek(self, offset, whence=os.SEEK_SET):
        if whence != os.SEEK_SET:
            raise NotImplementedError(
                b'_multifile does not support anything other'
                b' than os.SEEK_SET for whence on seek()'
            )
        if offset != 0:
            raise NotImplementedError(
                b'_multifile only supports seeking to start, but that '
                b'could be fixed if you need it'
            )
        for f in self._fileobjs:
            f.seek(0)
        self._index = 0


def makev1commandrequest(
    ui,
    requestbuilder,
    caps,
    capablefn,
    repobaseurl,
    cmd,
    args,
    remotehidden=False,
):
    """Make an HTTP request to run a command for a version 1 client.

    ``caps`` is a set of known server capabilities. The value may be
    None if capabilities are not yet known.

    ``capablefn`` is a function to evaluate a capability.

    ``cmd``, ``args``, and ``data`` define the command, its arguments, and
    raw data to pass to it.
    """
    if cmd == b'pushkey':
        args[b'data'] = b''
    data = args.pop(b'data', None)
    headers = args.pop(b'headers', {})

    ui.debug(b"sending %s command\n" % cmd)
    q = [(b'cmd', cmd)]
    if remotehidden:
        q.append(('access-hidden', '1'))
    headersize = 0
    # Important: don't use self.capable() here or else you end up
    # with infinite recursion when trying to look up capabilities
    # for the first time.
    postargsok = caps is not None and b'httppostargs' in caps

    # Send arguments via POST.
    if postargsok and args:
        strargs = urlreq.urlencode(sorted(args.items()))
        if not data:
            data = strargs
        else:
            if isinstance(data, bytes):
                i = io.BytesIO(data)
                i.length = len(data)
                data = i
            argsio = io.BytesIO(strargs)
            argsio.length = len(strargs)
            data = _multifile(argsio, data)
        headers['X-HgArgs-Post'] = len(strargs)
    elif args:
        # Calling self.capable() can infinite loop if we are calling
        # "capabilities". But that command should never accept wire
        # protocol arguments. So this should never happen.
        assert cmd != b'capabilities'
        httpheader = capablefn(b'httpheader')
        if httpheader:
            headersize = int(httpheader.split(b',', 1)[0])

        # Send arguments via HTTP headers.
        if headersize > 0:
            # The headers can typically carry more data than the URL.
            encoded_args = urlreq.urlencode(sorted(args.items()))
            for header, value in encodevalueinheaders(
                encoded_args, b'X-HgArg', headersize
            ):
                headers[header] = value
        # Send arguments via query string (Mercurial <1.9).
        else:
            q += sorted(args.items())

    qs = b'?%s' % urlreq.urlencode(q)
    cu = b"%s%s" % (repobaseurl, qs)
    size = 0
    if util.safehasattr(data, 'length'):
        size = data.length
    elif data is not None:
        size = len(data)
    if data is not None and 'Content-Type' not in headers:
        headers['Content-Type'] = 'application/mercurial-0.1'

    # Tell the server we accept application/mercurial-0.2 and multiple
    # compression formats if the server is capable of emitting those
    # payloads.
    # Note: Keep this set empty by default, as client advertisement of
    # protocol parameters should only occur after the handshake.
    protoparams = set()

    mediatypes = set()
    if caps is not None:
        mt = capablefn(b'httpmediatype')
        if mt:
            protoparams.add(b'0.1')
            mediatypes = set(mt.split(b','))

        protoparams.add(b'partial-pull')

    if b'0.2tx' in mediatypes:
        protoparams.add(b'0.2')

    if b'0.2tx' in mediatypes and capablefn(b'compression'):
        # We /could/ compare supported compression formats and prune
        # non-mutually supported or error if nothing is mutually supported.
        # For now, send the full list to the server and have it error.
        comps = [
            e.wireprotosupport().name
            for e in util.compengines.supportedwireengines(util.CLIENTROLE)
        ]
        protoparams.add(b'comp=%s' % b','.join(comps))

    if protoparams:
        protoheaders = encodevalueinheaders(
            b' '.join(sorted(protoparams)), b'X-HgProto', headersize or 1024
        )
        for header, value in protoheaders:
            headers[header] = value

    varyheaders = []
    for header in headers:
        if header.lower().startswith('x-hg'):
            varyheaders.append(header)

    if varyheaders:
        headers['Vary'] = ','.join(sorted(varyheaders))

    req = requestbuilder(pycompat.strurl(cu), data, headers)

    if data is not None:
        ui.debug(b"sending %d bytes\n" % size)
        req.add_unredirected_header('Content-Length', '%d' % size)

    return req, cu, qs


def sendrequest(ui, opener, req):
    """Send a prepared HTTP request.

    Returns the response object.
    """
    dbg = ui.debug
    if ui.debugflag and ui.configbool(b'devel', b'debug.peer-request'):
        line = b'devel-peer-request: %s\n'
        dbg(
            line
            % b'%s %s'
            % (
                pycompat.bytesurl(req.get_method()),
                pycompat.bytesurl(req.get_full_url()),
            )
        )
        hgargssize = None

        for header, value in sorted(req.header_items()):
            header = pycompat.bytesurl(header)
            value = pycompat.bytesurl(value)
            if header.startswith(b'X-hgarg-'):
                if hgargssize is None:
                    hgargssize = 0
                hgargssize += len(value)
            else:
                dbg(line % b'  %s %s' % (header, value))

        if hgargssize is not None:
            dbg(
                line
                % b'  %d bytes of commands arguments in headers'
                % hgargssize
            )
        data = req.data
        if data is not None:
            length = getattr(data, 'length', None)
            if length is None:
                length = len(data)
            dbg(line % b'  %d bytes of data' % length)

        start = util.timer()

    res = None
    try:
        res = opener.open(req)
    except urlerr.httperror as inst:
        if inst.code == 401:
            raise error.Abort(_(b'authorization failed'))
        raise
    except httplib.HTTPException as inst:
        ui.debug(
            b'http error requesting %s\n'
            % urlutil.hidepassword(req.get_full_url())
        )
        ui.traceback()
        raise IOError(None, inst)
    finally:
        if ui.debugflag and ui.configbool(b'devel', b'debug.peer-request'):
            code = res.code if res else -1
            dbg(
                line
                % b'  finished in %.4f seconds (%d)'
                % (util.timer() - start, code)
            )

    # Insert error handlers for common I/O failures.
    urlmod.wrapresponse(res)

    return res


class RedirectedRepoError(error.RepoError):
    def __init__(self, msg, respurl):
        super(RedirectedRepoError, self).__init__(msg)
        self.respurl = respurl


def parsev1commandresponse(ui, baseurl, requrl, qs, resp, compressible):
    # record the url we got redirected to
    redirected = False
    respurl = pycompat.bytesurl(resp.geturl())
    if respurl.endswith(qs):
        respurl = respurl[: -len(qs)]
        qsdropped = False
    else:
        qsdropped = True

    if baseurl.rstrip(b'/') != respurl.rstrip(b'/'):
        redirected = True
        if not ui.quiet:
            ui.warn(_(b'real URL is %s\n') % respurl)

    try:
        proto = pycompat.bytesurl(resp.getheader('content-type', ''))
    except AttributeError:
        proto = pycompat.bytesurl(resp.headers.get('content-type', ''))

    safeurl = urlutil.hidepassword(baseurl)
    if proto.startswith(b'application/hg-error'):
        raise error.OutOfBandError(resp.read())

    # Pre 1.0 versions of Mercurial used text/plain and
    # application/hg-changegroup. We don't support such old servers.
    if not proto.startswith(b'application/mercurial-'):
        ui.debug(b"requested URL: '%s'\n" % urlutil.hidepassword(requrl))
        msg = _(
            b"'%s' does not appear to be an hg repository:\n"
            b"---%%<--- (%s)\n%s\n---%%<---\n"
        ) % (safeurl, proto or b'no content-type', resp.read(1024))

        # Some servers may strip the query string from the redirect. We
        # raise a special error type so callers can react to this specially.
        if redirected and qsdropped:
            raise RedirectedRepoError(msg, respurl)
        else:
            raise error.RepoError(msg)

    try:
        subtype = proto.split(b'-', 1)[1]

        version_info = tuple([int(n) for n in subtype.split(b'.')])
    except ValueError:
        raise error.RepoError(
            _(b"'%s' sent a broken Content-Type header (%s)") % (safeurl, proto)
        )

    # TODO consider switching to a decompression reader that uses
    # generators.
    if version_info == (0, 1):
        if compressible:
            resp = util.compengines[b'zlib'].decompressorreader(resp)

    elif version_info == (0, 2):
        # application/mercurial-0.2 always identifies the compression
        # engine in the payload header.
        elen = struct.unpack(b'B', util.readexactly(resp, 1))[0]
        ename = util.readexactly(resp, elen)
        engine = util.compengines.forwiretype(ename)

        resp = engine.decompressorreader(resp)
    else:
        raise error.RepoError(
            _(b"'%s' uses newer protocol %s") % (safeurl, subtype)
        )

    return respurl, proto, resp


class httppeer(wireprotov1peer.wirepeer):
    def __init__(
        self, ui, path, url, opener, requestbuilder, caps, remotehidden=False
    ):
        super().__init__(ui, path=path, remotehidden=remotehidden)
        self._url = url
        self._caps = caps
        self.limitedarguments = caps is not None and b'httppostargs' not in caps
        self._urlopener = opener
        self._requestbuilder = requestbuilder
        self._remotehidden = remotehidden

    def __del__(self):
        for h in self._urlopener.handlers:
            h.close()
            getattr(h, "close_all", lambda: None)()

    # Begin of ipeerconnection interface.

    def url(self):
        return self.path.loc

    def local(self):
        return None

    def canpush(self):
        return True

    def close(self):
        try:
            reqs, sent, recv = (
                self._urlopener.requestscount,
                self._urlopener.sentbytescount,
                self._urlopener.receivedbytescount,
            )
        except AttributeError:
            return
        self.ui.note(
            _(
                b'(sent %d HTTP requests and %d bytes; '
                b'received %d bytes in responses)\n'
            )
            % (reqs, sent, recv)
        )

    # End of ipeerconnection interface.

    # Begin of ipeercommands interface.

    def capabilities(self):
        return self._caps

    def _finish_inline_clone_bundle(self, stream):
        # HTTP streams must hit the end to process the last empty
        # chunk of Chunked-Encoding so the connection can be reused.
        chunk = stream.read(1)
        if chunk:
            self._abort(error.ResponseError(_(b"unexpected response:"), chunk))

    # End of ipeercommands interface.

    def _callstream(self, cmd, _compressible=False, **args):
        args = pycompat.byteskwargs(args)

        req, cu, qs = makev1commandrequest(
            self.ui,
            self._requestbuilder,
            self._caps,
            self.capable,
            self._url,
            cmd,
            args,
            self._remotehidden,
        )

        resp = sendrequest(self.ui, self._urlopener, req)

        self._url, ct, resp = parsev1commandresponse(
            self.ui, self._url, cu, qs, resp, _compressible
        )

        return resp

    def _call(self, cmd, **args):
        fp = self._callstream(cmd, **args)
        try:
            return fp.read()
        finally:
            # if using keepalive, allow connection to be reused
            fp.close()

    def _callpush(self, cmd, cg, **args):
        # have to stream bundle to a temp file because we do not have
        # http 1.1 chunked transfer.

        types = self.capable(b'unbundle')
        try:
            types = types.split(b',')
        except AttributeError:
            # servers older than d1b16a746db6 will send 'unbundle' as a
            # boolean capability. They only support headerless/uncompressed
            # bundles.
            types = [b""]
        for x in types:
            if x in bundle2.bundletypes:
                type = x
                break

        tempname = bundle2.writebundle(self.ui, cg, None, type)
        fp = httpconnection.httpsendfile(self.ui, tempname, b"rb")
        headers = {'Content-Type': 'application/mercurial-0.1'}

        try:
            r = self._call(cmd, data=fp, headers=headers, **args)
            vals = r.split(b'\n', 1)
            if len(vals) < 2:
                raise error.ResponseError(_(b"unexpected response:"), r)
            return vals
        except urlerr.httperror:
            # Catch and re-raise these so we don't try and treat them
            # like generic socket errors. They lack any values in
            # .args on Python 3 which breaks our socket.error block.
            raise
        except socket.error as err:
            if err.args[0] in (errno.ECONNRESET, errno.EPIPE):
                raise error.Abort(_(b'push failed: %s') % err.args[1])
            raise error.Abort(err.args[1])
        finally:
            fp.close()
            os.unlink(tempname)

    def _calltwowaystream(self, cmd, fp, **args):
        filename = None
        try:
            # dump bundle to disk
            fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
            with os.fdopen(fd, "wb") as fh:
                d = fp.read(4096)
                while d:
                    fh.write(d)
                    d = fp.read(4096)
            # start http push
            with httpconnection.httpsendfile(self.ui, filename, b"rb") as fp_:
                headers = {'Content-Type': 'application/mercurial-0.1'}
                return self._callstream(cmd, data=fp_, headers=headers, **args)
        finally:
            if filename is not None:
                os.unlink(filename)

    def _callcompressable(self, cmd, **args):
        return self._callstream(cmd, _compressible=True, **args)

    def _abort(self, exception):
        raise exception


class queuedcommandfuture(futures.Future):
    """Wraps result() on command futures to trigger submission on call."""

    def result(self, timeout=None):
        if self.done():
            return futures.Future.result(self, timeout)

        self._peerexecutor.sendcommands()

        # sendcommands() will restore the original __class__ and self.result
        # will resolve to Future.result.
        return self.result(timeout)


def performhandshake(ui, url, opener, requestbuilder):
    # The handshake is a request to the capabilities command.

    caps = None

    def capable(x):
        raise error.ProgrammingError(b'should not be called')

    args = {}

    req, requrl, qs = makev1commandrequest(
        ui, requestbuilder, caps, capable, url, b'capabilities', args
    )
    resp = sendrequest(ui, opener, req)

    # The server may redirect us to the repo root, stripping the
    # ?cmd=capabilities query string from the URL. The server would likely
    # return HTML in this case and ``parsev1commandresponse()`` would raise.
    # We catch this special case and re-issue the capabilities request against
    # the new URL.
    #
    # We should ideally not do this, as a redirect that drops the query
    # string from the URL is arguably a server bug. (Garbage in, garbage out).
    # However,  Mercurial clients for several years appeared to handle this
    # issue without behavior degradation. And according to issue 5860, it may
    # be a longstanding bug in some server implementations. So we allow a
    # redirect that drops the query string to "just work."
    try:
        respurl, ct, resp = parsev1commandresponse(
            ui, url, requrl, qs, resp, compressible=False
        )
    except RedirectedRepoError as e:
        req, requrl, qs = makev1commandrequest(
            ui, requestbuilder, caps, capable, e.respurl, b'capabilities', args
        )
        resp = sendrequest(ui, opener, req)
        respurl, ct, resp = parsev1commandresponse(
            ui, url, requrl, qs, resp, compressible=False
        )

    try:
        rawdata = resp.read()
    finally:
        resp.close()

    if not ct.startswith(b'application/mercurial-'):
        raise error.ProgrammingError(b'unexpected content-type: %s' % ct)

    info = {b'v1capabilities': set(rawdata.split())}

    return respurl, info


def _make_peer(
    ui, path, opener=None, requestbuilder=urlreq.request, remotehidden=False
):
    """Construct an appropriate HTTP peer instance.

    ``opener`` is an ``url.opener`` that should be used to establish
    connections, perform HTTP requests.

    ``requestbuilder`` is the type used for constructing HTTP requests.
    It exists as an argument so extensions can override the default.
    """
    if path.url.query or path.url.fragment:
        msg = _(b'unsupported URL component: "%s"')
        msg %= path.url.query or path.url.fragment
        raise error.Abort(msg)

    # urllib cannot handle URLs with embedded user or passwd.
    url, authinfo = path.url.authinfo()
    ui.debug(b'using %s\n' % url)

    opener = opener or urlmod.opener(ui, authinfo)

    respurl, info = performhandshake(ui, url, opener, requestbuilder)

    return httppeer(
        ui,
        path,
        respurl,
        opener,
        requestbuilder,
        info[b'v1capabilities'],
        remotehidden=remotehidden,
    )


def make_peer(
    ui, path, create, intents=None, createopts=None, remotehidden=False
):
    if create:
        raise error.Abort(_(b'cannot create new http repository'))
    try:
        if path.url.scheme == b'https' and not urlmod.has_https:
            raise error.Abort(
                _(b'Python support for SSL and HTTPS is not installed')
            )

        inst = _make_peer(ui, path, remotehidden=remotehidden)

        return inst
    except error.RepoError as httpexception:
        try:
            r = statichttprepo.make_peer(ui, b"static-" + path.loc, create)
            ui.note(_(b'(falling back to static-http)\n'))
            return r
        except error.RepoError:
            raise httpexception  # use the original http RepoError instead