Mercurial > hg-stable
changeset 39874:14e500b58263
revlog: add method for obtaining storage info (API)
We currently have a handful of methods on the file and manifest
storage interfaces for obtaining metadata about storage. e.g.
files() is used to obtain the files backing storage. rawsize()
is to quickly compute the size of tracked revisions without resolving
their fulltext.
Code in upgrade and stream clone make heavy use of these methods.
The existing APIs are generic and don't necessarily have the
specialization that we need going forward. For example, files()
doesn't distinguish between exclusive storage and shared storage.
This makes stream clone difficult to implement when e.g. there may
be a single file backing storage for multiple tracked paths. It
also makes reporting difficult, as we don't know how many bytes are
actually used by storage since we can't easily identify shared files.
This commit implements a new method for obtaining storage metadata.
It is designed to accept arguments specifying what metadata to request
and to return a dict with those fields populated. We /could/ make
each of these attributes a separate method. But this is a specialized
API and I'm trying to avoid method bloat on the interfaces. There is
also the possibility that certain callers will want to obtain multiple
fields in different combinations and some backends may have performance
issues obtaining all that data via separate method calls.
Simple storage integration tests have been added. For now, we assume
fields can't be "None" (ignoring the interface documentation). We can
revisit this later.
Differential Revision: https://phab.mercurial-scm.org/D4747
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 24 Sep 2018 11:56:48 -0700 |
parents | b06303a208be |
children | b399ff55ee6d |
files | mercurial/filelog.py mercurial/manifest.py mercurial/repository.py mercurial/revlog.py mercurial/testing/storage.py |
diffstat | 5 files changed, 101 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/filelog.py Wed Sep 26 11:27:41 2018 -0700 +++ b/mercurial/filelog.py Mon Sep 24 11:56:48 2018 -0700 @@ -187,6 +187,14 @@ def verifyintegrity(self, state): return self._revlog.verifyintegrity(state) + def storageinfo(self, exclusivefiles=False, sharedfiles=False, + revisionscount=False, trackedsize=False, + storedsize=False): + return self._revlog.storageinfo( + exclusivefiles=exclusivefiles, sharedfiles=sharedfiles, + revisionscount=revisionscount, trackedsize=trackedsize, + storedsize=storedsize) + # TODO these aren't part of the interface and aren't internal methods. # Callers should be fixed to not use them.
--- a/mercurial/manifest.py Wed Sep 26 11:27:41 2018 -0700 +++ b/mercurial/manifest.py Mon Sep 24 11:56:48 2018 -0700 @@ -1592,6 +1592,14 @@ return self._revlog.clone(tr, destrevlog._revlog, **kwargs) + def storageinfo(self, exclusivefiles=False, sharedfiles=False, + revisionscount=False, trackedsize=False, + storedsize=False): + return self._revlog.storageinfo( + exclusivefiles=exclusivefiles, sharedfiles=sharedfiles, + revisionscount=revisionscount, trackedsize=trackedsize, + storedsize=storedsize) + @property def indexfile(self): return self._revlog.indexfile
--- a/mercurial/repository.py Wed Sep 26 11:27:41 2018 -0700 +++ b/mercurial/repository.py Mon Sep 24 11:56:48 2018 -0700 @@ -748,6 +748,41 @@ be a better API for that. """ + def storageinfo(exclusivefiles=False, sharedfiles=False, + revisionscount=False, trackedsize=False, + storedsize=False): + """Obtain information about storage for this file's data. + + Returns a dict describing storage for this tracked path. The keys + in the dict map to arguments of the same. The arguments are bools + indicating whether to calculate and obtain that data. + + exclusivefiles + Iterable of (vfs, path) describing files that are exclusively + used to back storage for this tracked path. + + sharedfiles + Iterable of (vfs, path) describing files that are used to back + storage for this tracked path. Those files may also provide storage + for other stored entities. + + revisionscount + Number of revisions available for retrieval. + + trackedsize + Total size in bytes of all tracked revisions. This is a sum of the + length of the fulltext of all revisions. + + storedsize + Total size in bytes used to store data for all tracked revisions. + This is commonly less than ``trackedsize`` due to internal usage + of deltas rather than fulltext revisions. + + Not all storage backends may support all queries are have a reasonable + value to use. In that case, the value should be set to ``None`` and + callers are expected to handle this special value. + """ + def verifyintegrity(state): """Verifies the integrity of file storage. @@ -1199,6 +1234,15 @@ manifest including files that did not match. """ + def storageinfo(exclusivefiles=False, sharedfiles=False, + revisionscount=False, trackedsize=False, + storedsize=False): + """Obtain information about storage for this manifest's data. + + See ``ifilestorage.storageinfo()`` for a description of this method. + This one behaves the same way, except for manifest data. + """ + class imanifestlog(interfaceutil.Interface): """Interface representing a collection of manifest snapshots.
--- a/mercurial/revlog.py Wed Sep 26 11:27:41 2018 -0700 +++ b/mercurial/revlog.py Mon Sep 24 11:56:48 2018 -0700 @@ -2643,3 +2643,28 @@ yield revlogproblem( warning=_("warning: '%s' uses revlog format %d; expected %d") % (self.indexfile, version, state['expectedversion'])) + + def storageinfo(self, exclusivefiles=False, sharedfiles=False, + revisionscount=False, trackedsize=False, + storedsize=False): + d = {} + + if exclusivefiles: + d['exclusivefiles'] = [(self.opener, self.indexfile)] + if not self._inline: + d['exclusivefiles'].append((self.opener, self.datafile)) + + if sharedfiles: + d['sharedfiles'] = [] + + if revisionscount: + d['revisionscount'] = len(self) + + if trackedsize: + d['trackedsize'] = sum(map(self.rawsize, iter(self))) + + if storedsize: + d['storedsize'] = sum(self.opener.stat(path).st_size + for path in self.files()) + + return d
--- a/mercurial/testing/storage.py Wed Sep 26 11:27:41 2018 -0700 +++ b/mercurial/testing/storage.py Mon Sep 24 11:56:48 2018 -0700 @@ -388,6 +388,10 @@ def testempty(self): f = self._makefilefn() + self.assertEqual(f.storageinfo(), {}) + self.assertEqual(f.storageinfo(revisionscount=True, trackedsize=True), + {'revisionscount': 0, 'trackedsize': 0}) + self.assertEqual(f.rawsize(nullrev), 0) for i in range(-5, 5): @@ -466,6 +470,10 @@ with self._maketransactionfn() as tr: node = f.add(fulltext, None, tr, 0, nullid, nullid) + self.assertEqual(f.storageinfo(), {}) + self.assertEqual(f.storageinfo(revisionscount=True, trackedsize=True), + {'revisionscount': 1, 'trackedsize': len(fulltext)}) + self.assertEqual(f.rawsize(0), len(fulltext)) with self.assertRaises(IndexError): @@ -553,6 +561,14 @@ node1 = f.add(fulltext1, None, tr, 1, node0, nullid) node2 = f.add(fulltext2, None, tr, 3, node1, nullid) + self.assertEqual(f.storageinfo(), {}) + self.assertEqual( + f.storageinfo(revisionscount=True, trackedsize=True), + { + 'revisionscount': 3, + 'trackedsize': len(fulltext0) + len(fulltext1) + len(fulltext2), + }) + self.assertEqual(f.rawsize(0), len(fulltext0)) self.assertEqual(f.rawsize(1), len(fulltext1)) self.assertEqual(f.rawsize(2), len(fulltext2))