lfs: add server side support for the Batch API
authorMatt Harbison <matt_harbison@yahoo.com>
Sat, 17 Mar 2018 01:47:57 -0400
changeset 37148 ea6fc58524d7
parent 37147 a2566597acb5
child 37149 cc0a6ea95d98
lfs: add server side support for the Batch API
hgext/lfs/wireprotolfsserver.py
--- a/hgext/lfs/wireprotolfsserver.py	Sat Mar 17 01:23:01 2018 -0400
+++ b/hgext/lfs/wireprotolfsserver.py	Sat Mar 17 01:47:57 2018 -0400
@@ -7,6 +7,10 @@
 
 from __future__ import absolute_import
 
+import datetime
+import errno
+import json
+
 from mercurial.hgweb import (
     common as hgwebcommon,
 )
@@ -15,6 +19,9 @@
     pycompat,
 )
 
+HTTP_OK = hgwebcommon.HTTP_OK
+HTTP_BAD_REQUEST = hgwebcommon.HTTP_BAD_REQUEST
+
 def handlewsgirequest(orig, rctx, req, res, checkperm):
     """Wrap wireprotoserver.handlewsgirequest() to possibly process an LFS
     request if it is left unprocessed by the wrapped method.
@@ -46,13 +53,177 @@
         res.setbodybytes(b'0\n%s\n' % pycompat.bytestr(e))
         return True
 
+def _sethttperror(res, code, message=None):
+    res.status = hgwebcommon.statusmessage(code, message=message)
+    res.headers[b'Content-Type'] = b'text/plain; charset=utf-8'
+    res.setbodybytes(b'')
+
 def _processbatchrequest(repo, req, res):
     """Handle a request for the Batch API, which is the gateway to granting file
     access.
 
     https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
     """
-    return False
+
+    # Mercurial client request:
+    #
+    #   HOST: localhost:$HGPORT
+    #   ACCEPT: application/vnd.git-lfs+json
+    #   ACCEPT-ENCODING: identity
+    #   USER-AGENT: git-lfs/2.3.4 (Mercurial 4.5.2+1114-f48b9754f04c+20180316)
+    #   Content-Length: 125
+    #   Content-Type: application/vnd.git-lfs+json
+    #
+    #   {
+    #     "objects": [
+    #       {
+    #         "oid": "31cf...8e5b"
+    #         "size": 12
+    #       }
+    #     ]
+    #     "operation": "upload"
+    #  }
+
+    if (req.method != b'POST'
+        or req.headers[b'Content-Type'] != b'application/vnd.git-lfs+json'
+        or req.headers[b'Accept'] != b'application/vnd.git-lfs+json'):
+        # TODO: figure out what the proper handling for a bad request to the
+        #       Batch API is.
+        _sethttperror(res, HTTP_BAD_REQUEST, b'Invalid Batch API request')
+        return True
+
+    # XXX: specify an encoding?
+    lfsreq = json.loads(req.bodyfh.read())
+
+    # If no transfer handlers are explicitly requested, 'basic' is assumed.
+    if 'basic' not in lfsreq.get('transfers', ['basic']):
+        _sethttperror(res, HTTP_BAD_REQUEST,
+                      b'Only the basic LFS transfer handler is supported')
+        return True
+
+    operation = lfsreq.get('operation')
+    if operation not in ('upload', 'download'):
+        _sethttperror(res, HTTP_BAD_REQUEST,
+                      b'Unsupported LFS transfer operation: %s' % operation)
+        return True
+
+    localstore = repo.svfs.lfslocalblobstore
+
+    objects = [p for p in _batchresponseobjects(req, lfsreq.get('objects', []),
+                                                operation, localstore)]
+
+    rsp = {
+        'transfer': 'basic',
+        'objects': objects,
+    }
+
+    res.status = hgwebcommon.statusmessage(HTTP_OK)
+    res.headers[b'Content-Type'] = b'application/vnd.git-lfs+json'
+    res.setbodybytes(pycompat.bytestr(json.dumps(rsp)))
+
+    return True
+
+def _batchresponseobjects(req, objects, action, store):
+    """Yield one dictionary of attributes for the Batch API response for each
+    object in the list.
+
+    req: The parsedrequest for the Batch API request
+    objects: The list of objects in the Batch API object request list
+    action: 'upload' or 'download'
+    store: The local blob store for servicing requests"""
+
+    # Successful lfs-test-server response to solict an upload:
+    # {
+    #    u'objects': [{
+    #       u'size': 12,
+    #       u'oid': u'31cf...8e5b',
+    #       u'actions': {
+    #           u'upload': {
+    #               u'href': u'http://localhost:$HGPORT/objects/31cf...8e5b',
+    #               u'expires_at': u'0001-01-01T00:00:00Z',
+    #               u'header': {
+    #                   u'Accept': u'application/vnd.git-lfs'
+    #               }
+    #           }
+    #       }
+    #    }]
+    # }
+
+    # TODO: Sort out the expires_at/expires_in/authenticated keys.
+
+    for obj in objects:
+        # Convert unicode to ASCII to create a filesystem path
+        oid = obj.get('oid').encode('ascii')
+        rsp = {
+            'oid': oid,
+            'size': obj.get('size'),  # XXX: should this check the local size?
+            #'authenticated': True,
+        }
+
+        exists = True
+        verifies = False
+
+        # Verify an existing file on the upload request, so that the client is
+        # solicited to re-upload if it corrupt locally.  Download requests are
+        # also verified, so the error can be flagged in the Batch API response.
+        # (Maybe we can use this to short circuit the download for `hg verify`,
+        # IFF the client can assert that the remote end is an hg server.)
+        # Otherwise, it's potentially overkill on download, since it is also
+        # verified as the file is streamed to the caller.
+        try:
+            verifies = store.verify(oid)
+        except IOError as inst:
+            if inst.errno != errno.ENOENT:
+                rsp['error'] = {
+                    'code': 500,
+                    'message': inst.strerror or 'Internal Server Server'
+                }
+                yield rsp
+                continue
+
+            exists = False
+
+        # Items are always listed for downloads.  They are dropped for uploads
+        # IFF they already exist locally.
+        if action == 'download':
+            if not exists:
+                rsp['error'] = {
+                    'code': 404,
+                    'message': "The object does not exist"
+                }
+                yield rsp
+                continue
+
+            elif not verifies:
+                rsp['error'] = {
+                    'code': 422,   # XXX: is this the right code?
+                    'message': "The object is corrupt"
+                }
+                yield rsp
+                continue
+
+        elif verifies:
+            yield rsp  # Skip 'actions': already uploaded
+            continue
+
+        expiresat = datetime.datetime.now() + datetime.timedelta(minutes=10)
+
+        rsp['actions'] = {
+            '%s' % action: {
+                # TODO: Account for the --prefix, if any.
+                'href': '%s/.hg/lfs/objects/%s' % (req.baseurl, oid),
+                # datetime.isoformat() doesn't include the 'Z' suffix
+                "expires_at": expiresat.strftime('%Y-%m-%dT%H:%M:%SZ'),
+                'header': {
+                    # The spec doesn't mention the Accept header here, but avoid
+                    # a gratuitous deviation from lfs-test-server in the test
+                    # output.
+                    'Accept': 'application/vnd.git-lfs'
+                }
+            }
+        }
+
+        yield rsp
 
 def _processbasictransfer(repo, req, res, checkperm):
     """Handle a single file upload (PUT) or download (GET) action for the Basic