hgext/largefiles/remotestore.py
author Na'Tosha Bard <natosha@unity3d.com>
Mon, 13 Feb 2012 18:37:07 +0100
changeset 16120 47ee41fcf42b
parent 15253 67d010779907
child 17127 9e1616307c4c
permissions -rw-r--r--
largefiles: optimize update speed by only updating changed largefiles Historically, during 'hg update', every largefile in the working copy was hashed (which is a very expensive operation on big files) and any largefiles that did not have a hash that matched their standin were updated. This patch optimizes 'hg update' by keeping track of what standins have changed between the old and new revisions, and only updating the largefiles that have changed. This saves a lot of time by avoiding the unecessary calculation of a list of sha1 hashes for big files. With this patch, the time 'hg update' takes to complete is a function of how many largefiles need to be updated and what their size is. Performance tests on a repository with about 80 largefiles ranging from a few MB to about 97 MB are shown below. The tests show how long it takes to run 'hg update' with no changes actually being updated. Mercurial 2.1 release: $ time hg update 0 files updated, 0 files merged, 0 files removed, 0 files unresolved getting changed largefiles 0 largefiles updated, 0 removed real 0m10.045s user 0m9.367s sys 0m0.674s With this patch: $ time hg update 0 files updated, 0 files merged, 0 files removed, 0 files unresolved real 0m0.965s user 0m0.845s sys 0m0.115s The same repsoitory, without the largefiles extension enabled: $ time hg update 0 files updated, 0 files merged, 0 files removed, 0 files unresolved real 0m0.799s user 0m0.684s sys 0m0.111s So before the patch, 'hg update' with no changes was approximately 9.25s slower with largefiles enabled. With this patch, it is approximately 0.165s slower.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
15168
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
     1
# Copyright 2010-2011 Fog Creek Software
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
     2
# Copyright 2010-2011 Unity Technologies
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
     3
#
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
     4
# This software may be used and distributed according to the terms of the
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
     5
# GNU General Public License version 2 or any later version.
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
     6
15252
6e809bb4f969 largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents: 15188
diff changeset
     7
'''remote largefile store; the base class for servestore'''
15168
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
     8
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
     9
import urllib2
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    10
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    11
from mercurial import util
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    12
from mercurial.i18n import _
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    13
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    14
import lfutil
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    15
import basestore
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    16
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    17
class remotestore(basestore.basestore):
15252
6e809bb4f969 largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents: 15188
diff changeset
    18
    '''a largefile store accessed over a network'''
15168
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    19
    def __init__(self, ui, repo, url):
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    20
        super(remotestore, self).__init__(ui, repo, url)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    21
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    22
    def put(self, source, hash):
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    23
        if self._verify(hash):
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    24
            return
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    25
        if self.sendfile(source, hash):
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    26
            raise util.Abort(
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    27
                _('remotestore: could not put %s to remote store %s')
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    28
                % (source, self.url))
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    29
        self.ui.debug(
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    30
            _('remotestore: put %s to remote store %s') % (source, self.url))
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    31
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    32
    def exists(self, hash):
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    33
        return self._verify(hash)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    34
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    35
    def sendfile(self, filename, hash):
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    36
        self.ui.debug('remotestore: sendfile(%s, %s)\n' % (filename, hash))
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    37
        fd = None
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    38
        try:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    39
            try:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    40
                fd = lfutil.httpsendfile(self.ui, filename)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    41
            except IOError, e:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    42
                raise util.Abort(
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    43
                    _('remotestore: could not open file %s: %s')
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    44
                    % (filename, str(e)))
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    45
            return self._put(hash, fd)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    46
        finally:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    47
            if fd:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    48
                fd.close()
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    49
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    50
    def _getfile(self, tmpfile, filename, hash):
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    51
        # quit if the largefile isn't there
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    52
        stat = self._stat(hash)
15253
67d010779907 largefiles: improve error reporting
Greg Ward <greg@gerg.ca>
parents: 15252
diff changeset
    53
        if stat == 1:
67d010779907 largefiles: improve error reporting
Greg Ward <greg@gerg.ca>
parents: 15252
diff changeset
    54
            raise util.Abort(_('remotestore: largefile %s is invalid') % hash)
67d010779907 largefiles: improve error reporting
Greg Ward <greg@gerg.ca>
parents: 15252
diff changeset
    55
        elif stat == 2:
67d010779907 largefiles: improve error reporting
Greg Ward <greg@gerg.ca>
parents: 15252
diff changeset
    56
            raise util.Abort(_('remotestore: largefile %s is missing') % hash)
15168
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    57
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    58
        try:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    59
            length, infile = self._get(hash)
15188
8e115063950d largefiles: don't break existing tests (syntax error, bad imports)
Greg Ward <greg@gerg.ca>
parents: 15168
diff changeset
    60
        except urllib2.HTTPError, e:
15168
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    61
            # 401s get converted to util.Aborts; everything else is fine being
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    62
            # turned into a StoreError
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    63
            raise basestore.StoreError(filename, hash, self.url, str(e))
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    64
        except urllib2.URLError, e:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    65
            # This usually indicates a connection problem, so don't
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    66
            # keep trying with the other files... they will probably
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    67
            # all fail too.
15253
67d010779907 largefiles: improve error reporting
Greg Ward <greg@gerg.ca>
parents: 15252
diff changeset
    68
            raise util.Abort('%s: %s' % (self.url, e.reason))
15168
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    69
        except IOError, e:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    70
            raise basestore.StoreError(filename, hash, self.url, str(e))
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    71
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    72
        # Mercurial does not close its SSH connections after writing a stream
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    73
        if length is not None:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    74
            infile = lfutil.limitreader(infile, length)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    75
        return lfutil.copyandhash(lfutil.blockstream(infile), tmpfile)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    76
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    77
    def _verify(self, hash):
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    78
        return not self._stat(hash)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    79
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    80
    def _verifyfile(self, cctx, cset, contents, standin, verified):
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    81
        filename = lfutil.splitstandin(standin)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    82
        if not filename:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    83
            return False
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    84
        fctx = cctx[standin]
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    85
        key = (filename, fctx.filenode())
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    86
        if key in verified:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    87
            return False
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    88
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    89
        verified.add(key)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    90
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    91
        stat = self._stat(hash)
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    92
        if not stat:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    93
            return False
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    94
        elif stat == 1:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    95
            self.ui.warn(
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    96
                _('changeset %s: %s: contents differ\n')
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    97
                % (cset, filename))
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    98
            return True # failed
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
    99
        elif stat == 2:
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
   100
            self.ui.warn(
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
   101
                _('changeset %s: %s missing\n')
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
   102
                % (cset, filename))
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
   103
            return True # failed
cfccd3bee7b3 hgext: add largefiles extension
various
parents:
diff changeset
   104
        else:
15253
67d010779907 largefiles: improve error reporting
Greg Ward <greg@gerg.ca>
parents: 15252
diff changeset
   105
            raise RuntimeError('verify failed: unexpected response from '
67d010779907 largefiles: improve error reporting
Greg Ward <greg@gerg.ca>
parents: 15252
diff changeset
   106
                               'statlfile (%r)' % stat)