diff hgext/lfs/blobstore.py @ 35927:9b413478f261

lfs: deduplicate oids in the transfer Apparently, we can't rely on the server to deduplicate for us. Sadly, the pointer object isn't hashable, so it can't be reduced by converting it to a set. In order to be hashable, it needs to be immutable. I had a bunch of code to change it to composition and forward the readonly dict methods to a member dict. But the pointer is updated via __setitem__() when creating the pointer file. So it didn't see worth adding all of that code to the class.
author Matt Harbison <matt_harbison@yahoo.com>
date Sun, 04 Feb 2018 16:17:43 -0500
parents fa993c3c8462
children 9e3cb58c7ab3
line wrap: on
line diff
--- a/hgext/lfs/blobstore.py	Sun Feb 04 15:26:49 2018 -0500
+++ b/hgext/lfs/blobstore.py	Sun Feb 04 16:17:43 2018 -0500
@@ -194,11 +194,11 @@
 
     def writebatch(self, pointers, fromstore):
         """Batch upload from local to remote blobstore."""
-        self._batch(pointers, fromstore, 'upload')
+        self._batch(_deduplicate(pointers), fromstore, 'upload')
 
     def readbatch(self, pointers, tostore):
         """Batch download from remote to local blostore."""
-        self._batch(pointers, tostore, 'download')
+        self._batch(_deduplicate(pointers), tostore, 'download')
 
     def _batchrequest(self, pointers, action):
         """Get metadata about objects pointed by pointers for given action
@@ -399,13 +399,13 @@
         self.vfs = lfsvfs(fullpath)
 
     def writebatch(self, pointers, fromstore):
-        for p in pointers:
+        for p in _deduplicate(pointers):
             content = fromstore.read(p.oid(), verify=True)
             with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
                 fp.write(content)
 
     def readbatch(self, pointers, tostore):
-        for p in pointers:
+        for p in _deduplicate(pointers):
             with self.vfs(p.oid(), 'rb') as fp:
                 tostore.download(p.oid(), fp)
 
@@ -444,6 +444,13 @@
     None: _promptremote,
 }
 
+def _deduplicate(pointers):
+    """Remove any duplicate oids that exist in the list"""
+    reduced = util.sortdict()
+    for p in pointers:
+        reduced[p.oid()] = p
+    return reduced.values()
+
 def _verify(oid, content):
     realoid = hashlib.sha256(content).hexdigest()
     if realoid != oid: