revlog: compress sidedata when doing "post-pull" sidedata update
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 03 May 2021 23:40:05 +0200
changeset 47259 07641bafa646
parent 47258 c4dbb7636a12
child 47260 ccdd280d1d0d
revlog: compress sidedata when doing "post-pull" sidedata update All path writing sidedata are now using compression (when appropriate). Differential Revision: https://phab.mercurial-scm.org/D10656
mercurial/cext/revlog.c
mercurial/pure/parsers.py
mercurial/revlog.py
--- a/mercurial/cext/revlog.c	Mon May 03 23:14:48 2021 +0200
+++ b/mercurial/cext/revlog.c	Mon May 03 23:40:05 2021 +0200
@@ -533,12 +533,13 @@
 {
 	uint64_t offset_flags, sidedata_offset;
 	int rev;
+	char comp_mode;
 	Py_ssize_t sidedata_comp_len;
 	char *data;
 #if LONG_MAX == 0x7fffffffL
-	const char *const sidedata_format = PY23("nKiK", "nKiK");
+	const char *const sidedata_format = PY23("nKiKB", "nKiKB");
 #else
-	const char *const sidedata_format = PY23("nkik", "nkik");
+	const char *const sidedata_format = PY23("nkikB", "nkikB");
 #endif
 
 	if (self->entry_size == v1_entry_size || self->inlined) {
@@ -553,7 +554,7 @@
 	}
 
 	if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
-	                      &sidedata_comp_len, &offset_flags))
+	                      &sidedata_comp_len, &offset_flags, &comp_mode))
 		return NULL;
 
 	if (rev < 0 || rev >= index_length(self)) {
@@ -573,6 +574,7 @@
 	putbe64(offset_flags, data);
 	putbe64(sidedata_offset, data + 64);
 	putbe32(sidedata_comp_len, data + 72);
+	data[76] = (data[76] & ~(3 << 2)) | ((comp_mode & 3) << 2);
 
 	Py_RETURN_NONE;
 }
--- a/mercurial/pure/parsers.py	Mon May 03 23:14:48 2021 +0200
+++ b/mercurial/pure/parsers.py	Mon May 03 23:40:05 2021 +0200
@@ -297,7 +297,12 @@
     index_format = revlog_constants.INDEX_ENTRY_V2
 
     def replace_sidedata_info(
-        self, rev, sidedata_offset, sidedata_length, offset_flags
+        self,
+        rev,
+        sidedata_offset,
+        sidedata_length,
+        offset_flags,
+        compression_mode,
     ):
         """
         Replace an existing index entry's sidedata offset and length with new
@@ -316,6 +321,7 @@
             entry[0] = offset_flags
             entry[8] = sidedata_offset
             entry[9] = sidedata_length
+            entry[11] = compression_mode
             entry = tuple(entry)
             new = self._pack_entry(entry)
             self._extra[rev - self._lgt] = new
--- a/mercurial/revlog.py	Mon May 03 23:14:48 2021 +0200
+++ b/mercurial/revlog.py	Mon May 03 23:40:05 2021 +0200
@@ -3381,6 +3381,26 @@
                 serialized_sidedata = sidedatautil.serialize_sidedata(
                     new_sidedata
                 )
+
+                sidedata_compression_mode = COMP_MODE_INLINE
+                if serialized_sidedata and self.hassidedata:
+                    sidedata_compression_mode = COMP_MODE_PLAIN
+                    h, comp_sidedata = self.compress(serialized_sidedata)
+                    if (
+                        h != b'u'
+                        and comp_sidedata[0] != b'\0'
+                        and len(comp_sidedata) < len(serialized_sidedata)
+                    ):
+                        assert not h
+                        if (
+                            comp_sidedata[0]
+                            == self._docket.default_compression_header
+                        ):
+                            sidedata_compression_mode = COMP_MODE_DEFAULT
+                            serialized_sidedata = comp_sidedata
+                        else:
+                            sidedata_compression_mode = COMP_MODE_INLINE
+                            serialized_sidedata = comp_sidedata
                 if entry[8] != 0 or entry[9] != 0:
                     # rewriting entries that already have sidedata is not
                     # supported yet, because it introduces garbage data in the
@@ -3395,6 +3415,7 @@
                     current_offset,
                     len(serialized_sidedata),
                     new_offset_flags,
+                    sidedata_compression_mode,
                 )
 
                 # the sidedata computation might have move the file cursors around