largefiles: batch statlfile requests when pushing a largefiles repo (issue3386)
This implements a part of issue 3386. It batches the request for the status of
all largefiles in the revisions that are about to be pushed into a single
request, instead of doing N separate requests.
In a real world test case, this change was verified to save 1,116 round-trips to
the server. It only requires a client-side change; it is backwards-compatible
with an older version of the server.
--- a/hgext/largefiles/basestore.py Wed Jul 04 02:21:04 2012 +0200
+++ b/hgext/largefiles/basestore.py Sun Jun 24 20:36:22 2012 +0200
@@ -48,8 +48,8 @@
'''Put source file into the store under <filename>/<hash>.'''
raise NotImplementedError('abstract method')
- def exists(self, hash):
- '''Check to see if the store contains the given hash.'''
+ def exists(self, hashes):
+ '''Check to see if the store contains the given hashes.'''
raise NotImplementedError('abstract method')
def get(self, files):
--- a/hgext/largefiles/lfcommands.py Wed Jul 04 02:21:04 2012 +0200
+++ b/hgext/largefiles/lfcommands.py Sun Jun 24 20:36:22 2012 +0200
@@ -340,7 +340,11 @@
store = basestore._openstore(rsrc, rdst, put=True)
at = 0
- files = filter(lambda h: not store.exists(h), files)
+ ui.debug("sending statlfile command for %d largefiles\n" % len(files))
+ retval = store.exists(files)
+ files = filter(lambda h: not retval[h], files)
+ ui.debug("%d largefiles need to be uploaded\n" % len(files))
+
for hash in files:
ui.progress(_('uploading largefiles'), at, unit='largefile',
total=len(files))
--- a/hgext/largefiles/proto.py Wed Jul 04 02:21:04 2012 +0200
+++ b/hgext/largefiles/proto.py Sun Jun 24 20:36:22 2012 +0200
@@ -7,6 +7,7 @@
import urllib2
from mercurial import error, httprepo, util, wireproto
+from mercurial.wireproto import batchable, future
from mercurial.i18n import _
import lfutil
@@ -119,15 +120,19 @@
length))
return (length, stream)
+ @batchable
def statlfile(self, sha):
+ f = future()
+ result = {'sha': sha}
+ yield result, f
try:
- return int(self._call("statlfile", sha=sha))
+ yield int(f.value)
except (ValueError, urllib2.HTTPError):
# If the server returns anything but an integer followed by a
# newline, newline, it's not speaking our language; if we get
# an HTTP error, we can't be sure the largefile is present;
# either way, consider it missing.
- return 2
+ yield 2
repo.__class__ = lfileswirerepository
--- a/hgext/largefiles/remotestore.py Wed Jul 04 02:21:04 2012 +0200
+++ b/hgext/largefiles/remotestore.py Sun Jun 24 20:36:22 2012 +0200
@@ -10,6 +10,7 @@
from mercurial import util
from mercurial.i18n import _
+from mercurial.wireproto import remotebatch
import lfutil
import basestore
@@ -20,8 +21,6 @@
super(remotestore, self).__init__(ui, repo, url)
def put(self, source, hash):
- if self._verify(hash):
- return
if self.sendfile(source, hash):
raise util.Abort(
_('remotestore: could not put %s to remote store %s')
@@ -29,8 +28,8 @@
self.ui.debug(
_('remotestore: put %s to remote store %s') % (source, self.url))
- def exists(self, hash):
- return self._verify(hash)
+ def exists(self, hashes):
+ return self._verify(hashes)
def sendfile(self, filename, hash):
self.ui.debug('remotestore: sendfile(%s, %s)\n' % (filename, hash))
@@ -74,8 +73,8 @@
infile = lfutil.limitreader(infile, length)
return lfutil.copyandhash(lfutil.blockstream(infile), tmpfile)
- def _verify(self, hash):
- return not self._stat(hash)
+ def _verify(self, hashes):
+ return self._stat(hashes)
def _verifyfile(self, cctx, cset, contents, standin, verified):
filename = lfutil.splitstandin(standin)
@@ -104,3 +103,8 @@
else:
raise RuntimeError('verify failed: unexpected response from '
'statlfile (%r)' % stat)
+
+ def batch(self):
+ '''Support for remote batching.'''
+ return remotebatch(self)
+
--- a/hgext/largefiles/wirestore.py Wed Jul 04 02:21:04 2012 +0200
+++ b/hgext/largefiles/wirestore.py Sun Jun 24 20:36:22 2012 +0200
@@ -25,5 +25,13 @@
def _get(self, hash):
return self.remote.getlfile(hash)
- def _stat(self, hash):
- return self.remote.statlfile(hash)
+ def _stat(self, hashes):
+ batch = self.remote.batch()
+ futures = {}
+ for hash in hashes:
+ futures[hash] = batch.statlfile(hash)
+ batch.submit()
+ retval = {}
+ for hash in hashes:
+ retval[hash] = not futures[hash].value
+ return retval