changeset 5863:34c7661bd5e2 stable

evolve: make depthcache.save/load use struct.unpack/pack (issue6354) arraytobytes() and arrayfrombytes() is not cross-platform.
author Anton Shestakov <av6@dwimlabs.net>
date Tue, 30 Mar 2021 19:18:08 +0800
parents 5056fb0c919e
children 97f5b5b25299
files hgext3rd/evolve/depthcache.py hgext3rd/evolve/genericcaches.py tests/test-discovery-obshashrange-cache.t
diffstat 3 files changed, 91 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/hgext3rd/evolve/depthcache.py	Wed Mar 24 22:07:07 2021 +0800
+++ b/hgext3rd/evolve/depthcache.py	Tue Mar 30 19:18:08 2021 +0800
@@ -181,20 +181,23 @@
         assert repo.filtername is None
 
         data = repo.cachevfs.tryread(self._filepath)
+        self._cachekey = self.emptykey
         self._data = array.array(r'l')
-        if not data:
-            self._cachekey = self.emptykey
-        else:
+        if data:
             headerdata = data[:self._cachekeysize]
-            self._cachekey = self._deserializecachekey(headerdata)
-            compat.arrayfrombytes(self._data, data[self._cachekeysize:])
+            cachekey = self._deserializecachekey(headerdata)
+            expected = self._datastruct.size * (cachekey[0] + 1)
+            data = data[self._cachekeysize:]
+            if len(data) == expected:
+                self._data.extend(self._deserializedata(data))
+                self._cachekey = cachekey
+            else:
+                repo.ui.debug(b'depthcache file seems to be corrupted, '
+                              b'it will be rebuilt from scratch\n')
         self._ondiskkey = self._cachekey
 
     def save(self, repo):
         """save the data to disk
-
-        Format is pretty simple, we serialise the cache key and then drop the
-        bytearray.
         """
         if self._cachekey is None or self._cachekey == self._ondiskkey:
             return
@@ -203,7 +206,7 @@
             cachefile = repo.cachevfs(self._filepath, b'w', atomictemp=True)
             headerdata = self._serializecachekey()
             cachefile.write(headerdata)
-            cachefile.write(compat.arraytobytes(self._data))
+            cachefile.write(self._serializedata(self._data))
             cachefile.close()
             self._ondiskkey = self._cachekey
         except (IOError, OSError) as exc:
--- a/hgext3rd/evolve/genericcaches.py	Wed Mar 24 22:07:07 2021 +0800
+++ b/hgext3rd/evolve/genericcaches.py	Tue Mar 30 19:18:08 2021 +0800
@@ -34,6 +34,8 @@
     _cachekeyspec = b'' # used for serialization
     _cachename = None # used for debug message
 
+    _datastruct = struct.Struct('<q')
+
     @abc.abstractmethod
     def __init__(self):
         super(incrementalcachebase, self).__init__()
@@ -133,6 +135,17 @@
         """read the cachekey from bytes"""
         return self._cachekeystruct.unpack(data)
 
+    def _serializedata(self, data):
+        """turn data into binary form"""
+        return b''.join(self._datastruct.pack(item) for item in data)
+
+    def _deserializedata(self, data):
+        """turn binary into data"""
+        return (
+            self._datastruct.unpack_from(data, i)[0]
+            for i in range(0, len(data), self._datastruct.size)
+        )
+
 class changelogsourcebase(incrementalcachebase):  # pytype: disable=ignored-metaclass
     """an abstract class for cache sourcing data from the changelog
 
--- a/tests/test-discovery-obshashrange-cache.t	Wed Mar 24 22:07:07 2021 +0800
+++ b/tests/test-discovery-obshashrange-cache.t	Tue Mar 30 19:18:08 2021 +0800
@@ -206,6 +206,32 @@
   0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
   0050: 00 00 00 00 00 00 00 00 00 00 00 00             |............|
 
+next let's see how caches deal with being written incorrectly
+
+  $ cat >> repack.py << EOF
+  > import struct
+  > import sys
+  > data = []
+  > with open(sys.argv[1], 'rb') as f:
+  >     header = f.read(24)
+  >     while True:
+  >         buf = f.read(8)
+  >         if not buf: break
+  >         data.append(struct.unpack('<q', buf)[0])
+  > with open(sys.argv[1], 'wb') as f:
+  >     f.write(header)
+  >     for item in data:
+  >         f.write(struct.pack('<l', item))
+  > EOF
+
+  $ cat >> truncate.py << EOF
+  > import os
+  > import sys
+  > with open(sys.argv[1], 'ab') as fp:
+  >     fp.seek(int(sys.argv[2]), os.SEEK_END)
+  >     fp.truncate()
+  > EOF
+
   $ hg -R main debugdepth --rev 'all()' --method compare --debug
   1ea73414a91b 1
   66f7d451a68b 2
@@ -216,6 +242,46 @@
   f69452c5b1af 7
   4de32a90b66c 8
 
+  $ "$PYTHON" repack.py main/.hg/cache/evoext-depthcache-00
+  $ f -H main/.hg/cache/evoext-depthcache-00
+  main/.hg/cache/evoext-depthcache-00:
+  0000: 00 00 00 07 4d e3 2a 90 b6 6c d0 83 eb f3 c0 0b |....M.*..l......|
+  0010: 41 27 7a a7 ab ca 51 dd 01 00 00 00 02 00 00 00 |A'z...Q.........|
+  0020: 03 00 00 00 04 00 00 00 05 00 00 00 06 00 00 00 |................|
+  0030: 07 00 00 00 08 00 00 00                         |........|
+
+  $ hg -R main debugdepth --rev 'all()' --method compare --debug
+  depthcache file seems to be corrupted, it will be rebuilt from scratch
+  1ea73414a91b 1
+  66f7d451a68b 2
+  01241442b3c2 3
+  2dc09a01254d 4
+  bebd167eb94d 5
+  c8d03c1b5e94 6
+  f69452c5b1af 7
+  4de32a90b66c 8
+
+  $ "$PYTHON" truncate.py main/.hg/cache/evoext-depthcache-00 -4
+  $ f -H main/.hg/cache/evoext-depthcache-00
+  main/.hg/cache/evoext-depthcache-00:
+  0000: 00 00 00 07 4d e3 2a 90 b6 6c d0 83 eb f3 c0 0b |....M.*..l......|
+  0010: 41 27 7a a7 ab ca 51 dd 01 00 00 00 00 00 00 00 |A'z...Q.........|
+  0020: 02 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 |................|
+  0030: 04 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 |................|
+  0040: 06 00 00 00 00 00 00 00 07 00 00 00 00 00 00 00 |................|
+  0050: 08 00 00 00                                     |....|
+
+  $ hg -R main debugdepth --rev 'all()' --method compare --debug
+  depthcache file seems to be corrupted, it will be rebuilt from scratch
+  1ea73414a91b 1
+  66f7d451a68b 2
+  01241442b3c2 3
+  2dc09a01254d 4
+  bebd167eb94d 5
+  c8d03c1b5e94 6
+  f69452c5b1af 7
+  4de32a90b66c 8
+
   $ hg -R main debugstablesortcache --debug
   number of revisions:            8
   number of merge:                0