sidedata: enable sidedata computers to optionally rewrite flags
Sidedata computers may want to influence the flags of the revision they touch.
For example, the computer for changelog-based copytracing can add a flag to
signify that this revision might affect copytracing, inversely removing said
flag if the information is no longer applicable.
See inline documentation in `storageutil` for more details.
Differential Revision: https://phab.mercurial-scm.org/D10344
--- a/mercurial/cext/revlog.c Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/cext/revlog.c Thu Apr 08 16:55:17 2021 +0200
@@ -503,14 +503,14 @@
inside the transaction that creates the given revision. */
static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
{
- uint64_t sidedata_offset;
+ uint64_t offset_flags, sidedata_offset;
int rev;
Py_ssize_t sidedata_comp_len;
char *data;
#if LONG_MAX == 0x7fffffffL
- const char *const sidedata_format = PY23("nKi", "nKi");
+ const char *const sidedata_format = PY23("nKiK", "nKiK");
#else
- const char *const sidedata_format = PY23("nki", "nki");
+ const char *const sidedata_format = PY23("nkik", "nkik");
#endif
if (self->hdrsize == v1_hdrsize || self->inlined) {
@@ -525,7 +525,7 @@
}
if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
- &sidedata_comp_len))
+ &sidedata_comp_len, &offset_flags))
return NULL;
if (rev < 0 || rev >= index_length(self)) {
@@ -542,6 +542,7 @@
/* Find the newly added node, offset from the "already on-disk" length
*/
data = self->added + self->hdrsize * (rev - self->length);
+ putbe64(offset_flags, data);
putbe64(sidedata_offset, data + 64);
putbe32(sidedata_comp_len, data + 72);
--- a/mercurial/interfaces/repository.py Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/interfaces/repository.py Thu Apr 08 16:55:17 2021 +0200
@@ -1856,7 +1856,7 @@
def savecommitmessage(text):
pass
- def register_sidedata_computer(kind, category, keys, computer):
+ def register_sidedata_computer(kind, category, keys, computer, flags):
pass
def register_wanted_sidedata(category):
--- a/mercurial/localrepo.py Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/localrepo.py Thu Apr 08 16:55:17 2021 +0200
@@ -3370,9 +3370,9 @@
return
self._wanted_sidedata.add(pycompat.bytestr(category))
- def register_sidedata_computer(self, kind, category, keys, computer):
+ def register_sidedata_computer(self, kind, category, keys, computer, flags):
if kind not in revlogconst.ALL_KINDS:
- msg = _(b"unexpected revlog kind %r.")
+ msg = _(b"unexpected revlog kind '%s'.")
raise error.ProgrammingError(msg % kind)
category = pycompat.bytestr(category)
if category in self._sidedata_computers.get(kind, []):
@@ -3381,7 +3381,7 @@
)
raise error.ProgrammingError(msg % category)
self._sidedata_computers.setdefault(kind, {})
- self._sidedata_computers[kind][category] = (keys, computer)
+ self._sidedata_computers[kind][category] = (keys, computer, flags)
# used to avoid circular references so destructors work
--- a/mercurial/metadata.py Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/metadata.py Thu Apr 08 16:55:17 2021 +0200
@@ -820,7 +820,9 @@
def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
- return _getsidedata(repo, rev)[0]
+ sidedata, has_copies_info = _getsidedata(repo, rev)
+ flags_to_add = sidedataflag.REVIDX_HASCOPIESINFO if has_copies_info else 0
+ return sidedata, (flags_to_add, 0)
def set_sidedata_spec_for_repo(repo):
@@ -831,6 +833,7 @@
sidedatamod.SD_FILES,
(sidedatamod.SD_FILES,),
copies_sidedata_computer,
+ sidedataflag.REVIDX_HASCOPIESINFO,
)
--- a/mercurial/pure/parsers.py Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/pure/parsers.py Thu Apr 08 16:55:17 2021 +0200
@@ -268,7 +268,9 @@
index_format = revlog_constants.INDEX_ENTRY_V2
null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
- def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
+ def replace_sidedata_info(
+ self, i, sidedata_offset, sidedata_length, offset_flags
+ ):
"""
Replace an existing index entry's sidedata offset and length with new
ones.
@@ -283,7 +285,8 @@
if i >= self._lgt:
packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
old = self._extra[i - self._lgt]
- new = old[:64] + packed + old[64 + packed_size :]
+ offset_flags = struct.pack(b">Q", offset_flags)
+ new = offset_flags + old[8:64] + packed + old[64 + packed_size :]
self._extra[i - self._lgt] = new
else:
msg = b"cannot rewrite entries outside of this transaction"
--- a/mercurial/revlog.py Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/revlog.py Thu Apr 08 16:55:17 2021 +0200
@@ -3105,7 +3105,7 @@
current_offset = fp.tell()
for rev in range(startrev, endrev + 1):
entry = self.index[rev]
- new_sidedata = storageutil.run_sidedata_helpers(
+ new_sidedata, flags = storageutil.run_sidedata_helpers(
store=self,
sidedata_helpers=helpers,
sidedata={},
@@ -3121,7 +3121,11 @@
# revlog.
msg = b"Rewriting existing sidedata is not supported yet"
raise error.Abort(msg)
- entry = entry[:8]
+
+ # Apply (potential) flags to add and to remove after running
+ # the sidedata helpers
+ new_offset_flags = entry[0] | flags[0] & ~flags[1]
+ entry = (new_offset_flags,) + entry[1:8]
entry += (current_offset, len(serialized_sidedata))
fp.write(serialized_sidedata)
@@ -3131,9 +3135,9 @@
# rewrite the new index entries
with self._indexfp(b'w+') as fp:
fp.seek(startrev * self.index.entry_size)
- for i, entry in enumerate(new_entries):
+ for i, e in enumerate(new_entries):
rev = startrev + i
- self.index.replace_sidedata_info(rev, entry[8], entry[9])
+ self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
packed = self.index.entry_binary(rev)
if rev == 0:
header = self.index.pack_header(self.version)
--- a/mercurial/utils/storageutil.py Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/utils/storageutil.py Thu Apr 08 16:55:17 2021 +0200
@@ -366,12 +366,17 @@
If not None, means that sidedata should be included.
A dictionary of revlog type to tuples of `(repo, computers, removers)`:
* `repo` is used as an argument for computers
- * `computers` is a list of `(category, (keys, computer)` that
+ * `computers` is a list of `(category, (keys, computer, flags)` that
compute the missing sidedata categories that were asked:
* `category` is the sidedata category
* `keys` are the sidedata keys to be affected
+ * `flags` is a bitmask (an integer) of flags to remove when
+ removing the category.
* `computer` is the function `(repo, store, rev, sidedata)` that
- returns a new sidedata dict.
+ returns a tuple of
+ `(new sidedata dict, (flags to add, flags to remove))`.
+ For example, it will return `({}, (0, 1 << 15))` to return no
+ sidedata, with no flags to add and one flag to remove.
* `removers` will remove the keys corresponding to the categories
that are present, but not needed.
If both `computers` and `removers` are empty, sidedata are simply not
@@ -491,12 +496,13 @@
available.add(rev)
serialized_sidedata = None
+ sidedata_flags = (0, 0)
if sidedata_helpers:
- sidedata = store.sidedata(rev)
- sidedata = run_sidedata_helpers(
+ old_sidedata = store.sidedata(rev)
+ sidedata, sidedata_flags = run_sidedata_helpers(
store=store,
sidedata_helpers=sidedata_helpers,
- sidedata=sidedata,
+ sidedata=old_sidedata,
rev=rev,
)
if sidedata:
@@ -507,6 +513,8 @@
if serialized_sidedata:
# Advertise that sidedata exists to the other side
protocol_flags |= CG_FLAG_SIDEDATA
+ # Computers and removers can return flags to add and/or remove
+ flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
yield resultcls(
node=node,
@@ -535,12 +543,17 @@
"""
repo, sd_computers, sd_removers = sidedata_helpers
kind = store.revlog_kind
- for _keys, sd_computer in sd_computers.get(kind, []):
- sidedata = sd_computer(repo, store, rev, sidedata)
- for keys, _computer in sd_removers.get(kind, []):
+ flags_to_add = 0
+ flags_to_remove = 0
+ for _keys, sd_computer, _flags in sd_computers.get(kind, []):
+ sidedata, flags = sd_computer(repo, store, rev, sidedata)
+ flags_to_add |= flags[0]
+ flags_to_remove |= flags[1]
+ for keys, _computer, flags in sd_removers.get(kind, []):
for key in keys:
sidedata.pop(key, None)
- return sidedata
+ flags_to_remove |= flags
+ return sidedata, (flags_to_add, flags_to_remove)
def deltaiscensored(delta, baserev, baselenfn):
--- a/tests/testlib/ext-sidedata-2.py Sat Apr 10 11:27:40 2021 +0200
+++ b/tests/testlib/ext-sidedata-2.py Thu Apr 08 16:55:17 2021 +0200
@@ -16,13 +16,15 @@
from mercurial.revlogutils import sidedata as sidedatamod
from mercurial.revlogutils import constants
+NO_FLAGS = (0, 0) # hoot
+
def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
sidedata = sidedata.copy()
if text is None:
text = revlog.revision(rev)
sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
- return sidedata
+ return sidedata, NO_FLAGS
def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
@@ -31,7 +33,7 @@
text = revlog.revision(rev)
sha256 = hashlib.sha256(text).digest()
sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
- return sidedata
+ return sidedata, NO_FLAGS
def reposetup(ui, repo):
@@ -42,10 +44,12 @@
sidedatamod.SD_TEST1,
(sidedatamod.SD_TEST1,),
compute_sidedata_1,
+ 0,
)
repo.register_sidedata_computer(
kind,
sidedatamod.SD_TEST2,
(sidedatamod.SD_TEST2,),
compute_sidedata_2,
+ 0,
)
--- a/tests/testlib/ext-sidedata-3.py Sat Apr 10 11:27:40 2021 +0200
+++ b/tests/testlib/ext-sidedata-3.py Thu Apr 08 16:55:17 2021 +0200
@@ -22,13 +22,15 @@
from mercurial.revlogutils import sidedata as sidedatamod
from mercurial.revlogutils import constants
+NO_FLAGS = (0, 0)
+
def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
sidedata = sidedata.copy()
if text is None:
text = revlog.revision(rev)
sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
- return sidedata
+ return sidedata, NO_FLAGS
def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
@@ -37,7 +39,7 @@
text = revlog.revision(rev)
sha256 = hashlib.sha256(text).digest()
sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
- return sidedata
+ return sidedata, NO_FLAGS
def compute_sidedata_3(repo, revlog, rev, sidedata, text=None):
@@ -46,7 +48,7 @@
text = revlog.revision(rev)
sha384 = hashlib.sha384(text).digest()
sidedata[sidedatamod.SD_TEST3] = struct.pack('>48s', sha384)
- return sidedata
+ return sidedata, NO_FLAGS
def wrapaddrevision(
@@ -55,8 +57,8 @@
if kwargs.get('sidedata') is None:
kwargs['sidedata'] = {}
sd = kwargs['sidedata']
- sd = compute_sidedata_1(None, self, None, sd, text=text)
- kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)
+ sd, flags = compute_sidedata_1(None, self, None, sd, text=text)
+ kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)[0]
return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
@@ -72,18 +74,21 @@
sidedatamod.SD_TEST1,
(sidedatamod.SD_TEST1,),
compute_sidedata_1,
+ 0,
)
repo.register_sidedata_computer(
kind,
sidedatamod.SD_TEST2,
(sidedatamod.SD_TEST2,),
compute_sidedata_2,
+ 0,
)
repo.register_sidedata_computer(
kind,
sidedatamod.SD_TEST3,
(sidedatamod.SD_TEST3,),
compute_sidedata_3,
+ 0,
)
repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
--- a/tests/testlib/ext-sidedata-5.py Sat Apr 10 11:27:40 2021 +0200
+++ b/tests/testlib/ext-sidedata-5.py Thu Apr 08 16:55:17 2021 +0200
@@ -23,13 +23,15 @@
from mercurial.revlogutils import sidedata as sidedatamod
from mercurial.revlogutils import constants
+NO_FLAGS = (0, 0)
+
def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
sidedata = sidedata.copy()
if text is None:
text = revlog.revision(rev)
sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
- return sidedata
+ return sidedata, NO_FLAGS
def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
@@ -38,7 +40,7 @@
text = revlog.revision(rev)
sha256 = hashlib.sha256(text).digest()
sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
- return sidedata
+ return sidedata, NO_FLAGS
def reposetup(ui, repo):
@@ -49,12 +51,14 @@
sidedatamod.SD_TEST1,
(sidedatamod.SD_TEST1,),
compute_sidedata_1,
+ 0,
)
repo.register_sidedata_computer(
kind,
sidedatamod.SD_TEST2,
(sidedatamod.SD_TEST2,),
compute_sidedata_2,
+ 0,
)
# We don't register sidedata computers because we don't care within these