diff mercurial/store.py @ 47877:2174f54aab18

store: return just one filename in walk functions Various walk functions return `(revlog_type, decoded, encoded)` where decoded could be None. But no-one cares about `encoded` and expects `unencoded` to be present, except verify (because this can only happen with old repo formats). Simplify all this by either failing outright if a decoding a filename fails (instead of almost certainly failing with a type error due to treating None as a bytes), or skipping the filename but providing in an out argument for hg verify. Differential Revision: https://phab.mercurial-scm.org/D11248
author Valentin Gatien-Baron <valentin.gatienbaron@gmail.com>
date Mon, 02 Aug 2021 08:05:13 -0400
parents 59bc92a7c60f
children 6000f5b25c9b
line wrap: on
line diff
--- a/mercurial/store.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/store.py	Mon Aug 02 08:05:13 2021 -0400
@@ -472,7 +472,7 @@
         return self.path + b'/' + encodedir(f)
 
     def _walk(self, relpath, recurse):
-        '''yields (unencoded, encoded, size)'''
+        '''yields (revlog_type, unencoded, size)'''
         path = self.path
         if relpath:
             path += b'/' + relpath
@@ -488,7 +488,7 @@
                     rl_type = is_revlog(f, kind, st)
                     if rl_type is not None:
                         n = util.pconvert(fp[striplen:])
-                        l.append((rl_type, decodedir(n), n, st.st_size))
+                        l.append((rl_type, decodedir(n), st.st_size))
                     elif kind == stat.S_IFDIR and recurse:
                         visit.append(fp)
         l.sort()
@@ -505,26 +505,32 @@
         rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
         return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
 
-    def datafiles(self, matcher=None):
+    def datafiles(self, matcher=None, undecodable=None):
+        """Like walk, but excluding the changelog and root manifest.
+
+        When [undecodable] is None, revlogs names that can't be
+        decoded cause an exception. When it is provided, it should
+        be a list and the filenames that can't be decoded are added
+        to it instead. This is very rarely needed."""
         files = self._walk(b'data', True) + self._walk(b'meta', True)
-        for (t, u, e, s) in files:
-            yield (FILEFLAGS_FILELOG | t, u, e, s)
+        for (t, u, s) in files:
+            yield (FILEFLAGS_FILELOG | t, u, s)
 
     def topfiles(self):
         # yield manifest before changelog
         files = reversed(self._walk(b'', False))
-        for (t, u, e, s) in files:
+        for (t, u, s) in files:
             if u.startswith(b'00changelog'):
-                yield (FILEFLAGS_CHANGELOG | t, u, e, s)
+                yield (FILEFLAGS_CHANGELOG | t, u, s)
             elif u.startswith(b'00manifest'):
-                yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
+                yield (FILEFLAGS_MANIFESTLOG | t, u, s)
             else:
-                yield (FILETYPE_OTHER | t, u, e, s)
+                yield (FILETYPE_OTHER | t, u, s)
 
     def walk(self, matcher=None):
         """return file related to data storage (ie: revlogs)
 
-        yields (file_type, unencoded, encoded, size)
+        yields (file_type, unencoded, size)
 
         if a matcher is passed, storage files of only those tracked paths
         are passed with matches the matcher
@@ -574,15 +580,20 @@
     # However that might change so we should probably add a test and encoding
     # decoding for it too. see issue6548
 
-    def datafiles(self, matcher=None):
-        for t, a, b, size in super(encodedstore, self).datafiles():
+    def datafiles(self, matcher=None, undecodable=None):
+        for t, f1, size in super(encodedstore, self).datafiles():
             try:
-                a = decodefilename(a)
+                f2 = decodefilename(f1)
             except KeyError:
-                a = None
-            if a is not None and not _matchtrackedpath(a, matcher):
+                if undecodable is None:
+                    msg = _(b'undecodable revlog name %s') % f1
+                    raise error.StorageError(msg)
+                else:
+                    undecodable.append(f1)
+                    continue
+            if not _matchtrackedpath(f2, matcher):
                 continue
-            yield t, a, b, size
+            yield t, f2, size
 
     def join(self, f):
         return self.path + b'/' + encodefilename(f)
@@ -770,7 +781,7 @@
     def getsize(self, path):
         return self.rawvfs.stat(path).st_size
 
-    def datafiles(self, matcher=None):
+    def datafiles(self, matcher=None, undecodable=None):
         for f in sorted(self.fncache):
             if not _matchtrackedpath(f, matcher):
                 continue
@@ -779,7 +790,7 @@
                 t = revlog_type(f)
                 assert t is not None, f
                 t |= FILEFLAGS_FILELOG
-                yield t, f, ef, self.getsize(ef)
+                yield t, f, self.getsize(ef)
             except OSError as err:
                 if err.errno != errno.ENOENT:
                     raise