hgext/largefiles/proto.py
author Augie Fackler <raf@durin42.com>
Fri, 17 Oct 2014 13:52:10 -0400
changeset 23031 3c0983cc279e
parent 21084 70252bdfd39c
child 25079 bee00e0c2e45
permissions -rw-r--r--
i18n: cache the result of every gettext call In looking at profiler output for 'hg log' on mozilla-central, I noticed we spent a _huge_ amount of time in gettext relative to what it's doing. Caching provides a roughly 15% performance improvement even on repositories as small as hg. == hg repo on linux == Before: % cumulative self time seconds seconds name 5.05 0.19 0.19 i18n.py:62:gettext 4.84 0.18 0.18 revlog.py:88:decompress 2.95 0.17 0.11 changelog.py:201:node 2.32 0.09 0.09 ui.py:577:write 2.11 0.08 0.08 i18n.py:72:gettext 2.11 0.08 0.08 obsolete.py:196:_fm0readmarkers 1.89 0.07 0.07 obsolete.py:569:_load 1.68 0.63 0.06 localrepo.py:29:__get__ real 0m4.026s user 0m3.993s sys 0m0.034s After: % cumulative self time seconds seconds name 8.05 0.26 0.26 revlog.py:88:decompress 2.68 0.22 0.09 color.py:395:write 2.20 0.07 0.07 obsolete.py:196:_fm0readmarkers 1.95 0.06 0.06 obsolete.py:174:_fm0readmarkers 1.95 0.06 0.06 ui.py:577:write 1.95 0.06 0.06 util.py:1228:datestr 1.71 0.06 0.06 utf_8.py:16:decode 1.71 0.06 0.06 revlog.py:273:__len__ real 0m3.519s user 0m3.447s sys 0m0.073s == mozilla-central repo on linux == Before: % cumulative self time seconds seconds name 7.72 2.35 2.35 revlog.py:88:decompress 4.46 1.36 1.36 i18n.py:62:gettext 2.22 0.67 0.67 i18n.py:72:gettext 2.19 1.14 0.67 changelog.py:201:node 2.16 0.66 0.66 ui.py:577:write 1.96 0.60 0.60 utf_8.py:16:decode 1.93 1.97 0.59 color.py:395:write 1.85 0.81 0.56 changelog.py:136:tip real 0m30.822s user 0m30.660s sys 0m0.149s After: % cumulative self time seconds seconds name 9.82 2.49 2.49 revlog.py:88:decompress 2.67 1.31 0.68 localrepo.py:29:__get__ 2.57 0.65 0.65 utf_8.py:16:decode 2.48 1.01 0.63 changelog.py:201:node 2.10 0.82 0.53 changelog.py:136:tip 2.01 0.51 0.51 ui.py:577:write 1.91 0.49 0.49 util.py:1232:datestr 1.85 1.65 0.47 color.py:395:write real 0m25.619s user 0m25.446s sys 0m0.166s == cpython repo on os x = Before: % cumulative self time seconds seconds name 5.05 1.35 1.35 cmdutil.py:982:_show 4.59 1.22 1.22 revlog.py:274:__len__ 3.98 1.06 1.06 i18n.py:62:gettext 3.91 1.04 1.04 revlog.py:1016:revision 3.68 0.98 0.98 revlog.py:337:parents 3.45 0.92 0.92 revlog.py:88:decompress 2.91 0.78 0.78 revlog.py:309:rev 2.62 0.70 0.70 revlog.py:1033:revision real 0m30.414s user 0m28.145s sys 0m0.541s After: % cumulative self time seconds seconds name 7.98 1.66 1.66 cmdutil.py:982:_show 6.83 1.42 1.42 changelog.py:46:decodeextra 5.18 1.08 1.08 revlog.py:274:__len__ 3.94 0.82 0.82 revlog.py:1016:revision 3.41 0.71 0.71 revlog.py:309:rev 3.32 0.69 0.69 revlog.py:88:decompress 2.99 0.63 0.62 revlog.py:1033:revision 2.69 0.56 0.56 revlog.py:341:start real 0m22.811s user 0m21.883s sys 0m0.397s

# Copyright 2011 Fog Creek Software
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import os
import urllib2
import re

from mercurial import error, httppeer, util, wireproto
from mercurial.i18n import _

import lfutil

LARGEFILES_REQUIRED_MSG = ('\nThis repository uses the largefiles extension.'
                           '\n\nPlease enable it in your Mercurial config '
                           'file.\n')

# these will all be replaced by largefiles.uisetup
capabilitiesorig = None
ssholdcallstream = None
httpoldcallstream = None

def putlfile(repo, proto, sha):
    '''Put a largefile into a repository's local store and into the
    user cache.'''
    proto.redirect()

    path = lfutil.storepath(repo, sha)
    util.makedirs(os.path.dirname(path))
    tmpfp = util.atomictempfile(path, createmode=repo.store.createmode)

    try:
        try:
            proto.getfile(tmpfp)
            tmpfp._fp.seek(0)
            if sha != lfutil.hexsha1(tmpfp._fp):
                raise IOError(0, _('largefile contents do not match hash'))
            tmpfp.close()
            lfutil.linktousercache(repo, sha)
        except IOError, e:
            repo.ui.warn(_('largefiles: failed to put %s into store: %s\n') %
                         (sha, e.strerror))
            return wireproto.pushres(1)
    finally:
        tmpfp.discard()

    return wireproto.pushres(0)

def getlfile(repo, proto, sha):
    '''Retrieve a largefile from the repository-local cache or system
    cache.'''
    filename = lfutil.findfile(repo, sha)
    if not filename:
        raise util.Abort(_('requested largefile %s not present in cache') % sha)
    f = open(filename, 'rb')
    length = os.fstat(f.fileno())[6]

    # Since we can't set an HTTP content-length header here, and
    # Mercurial core provides no way to give the length of a streamres
    # (and reading the entire file into RAM would be ill-advised), we
    # just send the length on the first line of the response, like the
    # ssh proto does for string responses.
    def generator():
        yield '%d\n' % length
        for chunk in util.filechunkiter(f):
            yield chunk
    return wireproto.streamres(generator())

def statlfile(repo, proto, sha):
    '''Return '2\n' if the largefile is missing, '0\n' if it seems to be in
    good condition.

    The value 1 is reserved for mismatched checksum, but that is too expensive
    to be verified on every stat and must be caught be running 'hg verify'
    server side.'''
    filename = lfutil.findfile(repo, sha)
    if not filename:
        return '2\n'
    return '0\n'

def wirereposetup(ui, repo):
    class lfileswirerepository(repo.__class__):
        def putlfile(self, sha, fd):
            # unfortunately, httprepository._callpush tries to convert its
            # input file-like into a bundle before sending it, so we can't use
            # it ...
            if issubclass(self.__class__, httppeer.httppeer):
                res = None
                try:
                    res = self._call('putlfile', data=fd, sha=sha,
                        headers={'content-type':'application/mercurial-0.1'})
                    d, output = res.split('\n', 1)
                    for l in output.splitlines(True):
                        self.ui.warn(_('remote: '), l) # assume l ends with \n
                    return int(d)
                except (ValueError, urllib2.HTTPError):
                    self.ui.warn(_('unexpected putlfile response: %r\n') % res)
                    return 1
            # ... but we can't use sshrepository._call because the data=
            # argument won't get sent, and _callpush does exactly what we want
            # in this case: send the data straight through
            else:
                try:
                    ret, output = self._callpush("putlfile", fd, sha=sha)
                    if ret == "":
                        raise error.ResponseError(_('putlfile failed:'),
                                output)
                    return int(ret)
                except IOError:
                    return 1
                except ValueError:
                    raise error.ResponseError(
                        _('putlfile failed (unexpected response):'), ret)

        def getlfile(self, sha):
            """returns an iterable with the chunks of the file with sha sha"""
            stream = self._callstream("getlfile", sha=sha)
            length = stream.readline()
            try:
                length = int(length)
            except ValueError:
                self._abort(error.ResponseError(_("unexpected response:"),
                                                length))

            # SSH streams will block if reading more than length
            for chunk in util.filechunkiter(stream, 128 * 1024, length):
                yield chunk
            # HTTP streams must hit the end to process the last empty
            # chunk of Chunked-Encoding so the connection can be reused.
            if issubclass(self.__class__, httppeer.httppeer):
                chunk = stream.read(1)
                if chunk:
                    self._abort(error.ResponseError(_("unexpected response:"),
                                                    chunk))

        @wireproto.batchable
        def statlfile(self, sha):
            f = wireproto.future()
            result = {'sha': sha}
            yield result, f
            try:
                yield int(f.value)
            except (ValueError, urllib2.HTTPError):
                # If the server returns anything but an integer followed by a
                # newline, newline, it's not speaking our language; if we get
                # an HTTP error, we can't be sure the largefile is present;
                # either way, consider it missing.
                yield 2

    repo.__class__ = lfileswirerepository

# advertise the largefiles=serve capability
def capabilities(repo, proto):
    return capabilitiesorig(repo, proto) + ' largefiles=serve'

def heads(repo, proto):
    if lfutil.islfilesrepo(repo):
        return wireproto.ooberror(LARGEFILES_REQUIRED_MSG)
    return wireproto.heads(repo, proto)

def sshrepocallstream(self, cmd, **args):
    if cmd == 'heads' and self.capable('largefiles'):
        cmd = 'lheads'
    if cmd == 'batch' and self.capable('largefiles'):
        args['cmds'] = args['cmds'].replace('heads ', 'lheads ')
    return ssholdcallstream(self, cmd, **args)

headsre = re.compile(r'(^|;)heads\b')

def httprepocallstream(self, cmd, **args):
    if cmd == 'heads' and self.capable('largefiles'):
        cmd = 'lheads'
    if cmd == 'batch' and self.capable('largefiles'):
        args['cmds'] = headsre.sub('lheads', args['cmds'])
    return httpoldcallstream(self, cmd, **args)