revlog: move censor logic out of censor extension
authorGregory Szorc <gregory.szorc@gmail.com>
Tue, 18 Sep 2018 17:51:43 -0700
changeset 39778 a6b3c4c1019f
parent 39777 b63dee7bd0d9
child 39779 42aa61fc5544
revlog: move censor logic out of censor extension The censor extension is doing very low-level things with revlogs. It is fundamentally impossible for this logic to remain in the censor extension while support multiple storage backends: we need each storage backend to implement censor in its own storage-specific way. This commit effectively moves the revlog-specific censoring code to be a method of revlogs themselves. We've defined a new API on the file storage interface for censoring an individual node. Even though the current censoring code doesn't use it, the API requires a transaction instance because it logically makes sense for storage backends to require an active transaction (which implies a held write lock) in order to rewrite storage. After this commit, the censor extension has been reduced to boilerplate precondition checking before invoking the generic storage API. I tried to keep the code as similar as possible. But some minor changes were made: * We use self._io instead of instantiating a new revlogio instance. * We compare self.version against REVLOGV0 instead of != REVLOGV1 because presumably all future revlog versions will support censoring. * We use self.opener instead of going through repo.svfs (we don't have a handle on the repo instance from a revlog). * "revlog" dropped * Replace "flog" with "self". Differential Revision: https://phab.mercurial-scm.org/D4656
hgext/censor.py
mercurial/filelog.py
mercurial/repository.py
mercurial/revlog.py
--- a/hgext/censor.py	Tue Sep 18 16:47:09 2018 -0700
+++ b/hgext/censor.py	Tue Sep 18 17:51:43 2018 -0700
@@ -32,11 +32,8 @@
 
 from mercurial import (
     error,
-    pycompat,
     registrar,
-    revlog,
     scmutil,
-    util,
 )
 
 cmdtable = {}
@@ -98,90 +95,5 @@
         raise error.Abort(_('cannot censor working directory'),
             hint=_('clean/delete/update first'))
 
-    flogv = flog.version & 0xFFFF
-    if flogv != revlog.REVLOGV1:
-        raise error.Abort(
-            _('censor does not support revlog version %d') % (flogv,))
-
-    tombstone = revlog.packmeta({"censored": tombstone}, "")
-
-    crev = fctx.filerev()
-
-    if len(tombstone) > flog.rawsize(crev):
-        raise error.Abort(_(
-            'censor tombstone must be no longer than censored data'))
-
-    # Using two files instead of one makes it easy to rewrite entry-by-entry
-    idxread = repo.svfs(flog.indexfile, 'r')
-    idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
-    if flog.version & revlog.FLAG_INLINE_DATA:
-        dataread, datawrite = idxread, idxwrite
-    else:
-        dataread = repo.svfs(flog.datafile, 'r')
-        datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
-
-    # Copy all revlog data up to the entry to be censored.
-    rio = revlog.revlogio()
-    offset = flog.start(crev)
-
-    for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
-        idxwrite.write(chunk)
-    for chunk in util.filechunkiter(dataread, limit=offset):
-        datawrite.write(chunk)
-
-    def rewriteindex(r, newoffs, newdata=None):
-        """Rewrite the index entry with a new data offset and optional new data.
-
-        The newdata argument, if given, is a tuple of three positive integers:
-        (new compressed, new uncompressed, added flag bits).
-        """
-        offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
-        flags = revlog.gettype(offlags)
-        if newdata:
-            comp, uncomp, nflags = newdata
-            flags |= nflags
-        offlags = revlog.offset_type(newoffs, flags)
-        e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
-        idxwrite.write(rio.packentry(e, None, flog.version, r))
-        idxread.seek(rio.size, 1)
-
-    def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
-        """Write the given full text to the filelog with the given data offset.
-
-        Returns:
-            The integer number of data bytes written, for tracking data offsets.
-        """
-        flag, compdata = flog.compress(data)
-        newcomp = len(flag) + len(compdata)
-        rewriteindex(r, offs, (newcomp, len(data), nflags))
-        datawrite.write(flag)
-        datawrite.write(compdata)
-        dataread.seek(flog.length(r), 1)
-        return newcomp
-
-    # Rewrite censored revlog entry with (padded) tombstone data.
-    pad = ' ' * (flog.rawsize(crev) - len(tombstone))
-    offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
-
-    # Rewrite all following filelog revisions fixing up offsets and deltas.
-    for srev in pycompat.xrange(crev + 1, len(flog)):
-        if crev in flog.parentrevs(srev):
-            # Immediate children of censored node must be re-added as fulltext.
-            try:
-                revdata = flog.revision(srev)
-            except error.CensoredNodeError as e:
-                revdata = e.tombstone
-            dlen = rewrite(srev, offset, revdata)
-        else:
-            # Copy any other revision data verbatim after fixing up the offset.
-            rewriteindex(srev, offset)
-            dlen = flog.length(srev)
-            for chunk in util.filechunkiter(dataread, limit=dlen):
-                datawrite.write(chunk)
-        offset += dlen
-
-    idxread.close()
-    idxwrite.close()
-    if dataread is not idxread:
-        dataread.close()
-        datawrite.close()
+    with repo.transaction(b'censor') as tr:
+        flog.censorrevision(tr, fnode, tombstone=tombstone)
--- a/mercurial/filelog.py	Tue Sep 18 16:47:09 2018 -0700
+++ b/mercurial/filelog.py	Tue Sep 18 17:51:43 2018 -0700
@@ -111,6 +111,9 @@
     def strip(self, minlink, transaction):
         return self._revlog.strip(minlink, transaction)
 
+    def censorrevision(self, tr, node, tombstone=b''):
+        return self._revlog.censorrevision(node, tombstone=tombstone)
+
     def files(self):
         return self._revlog.files()
 
--- a/mercurial/repository.py	Tue Sep 18 16:47:09 2018 -0700
+++ b/mercurial/repository.py	Tue Sep 18 17:51:43 2018 -0700
@@ -691,6 +691,23 @@
         even if it existed in the store previously.
         """
 
+    def censorrevision(tr, node, tombstone=b''):
+        """Remove the content of a single revision.
+
+        The specified ``node`` will have its content purged from storage.
+        Future attempts to access the revision data for this node will
+        result in failure.
+
+        A ``tombstone`` message can optionally be stored. This message may be
+        displayed to users when they attempt to access the missing revision
+        data.
+
+        Storage backends may have stored deltas against the previous content
+        in this revision. As part of censoring a revision, these storage
+        backends are expected to rewrite any internally stored deltas such
+        that they no longer reference the deleted content.
+        """
+
     def getstrippoint(minlink):
         """Find the minimum revision that must be stripped to strip a linkrev.
 
--- a/mercurial/revlog.py	Tue Sep 18 16:47:09 2018 -0700
+++ b/mercurial/revlog.py	Tue Sep 18 17:51:43 2018 -0700
@@ -2492,3 +2492,92 @@
         finally:
             destrevlog._lazydeltabase = oldlazydeltabase
             destrevlog._deltabothparents = oldamd
+
+    def censorrevision(self, node, tombstone=b''):
+        if (self.version & 0xFFFF) == REVLOGV0:
+            raise error.RevlogError(_('cannot censor with version %d revlogs') %
+                                    self.version)
+
+        rev = self.rev(node)
+        tombstone = packmeta({b'censored': tombstone}, b'')
+
+        if len(tombstone) > self.rawsize(rev):
+            raise error.Abort(_('censor tombstone must be no longer than '
+                                'censored data'))
+
+        # Using two files instead of one makes it easy to rewrite entry-by-entry
+        idxread = self.opener(self.indexfile, 'r')
+        idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
+        if self.version & FLAG_INLINE_DATA:
+            dataread, datawrite = idxread, idxwrite
+        else:
+            dataread = self.opener(self.datafile, 'r')
+            datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
+
+        # Copy all revlog data up to the entry to be censored.
+        offset = self.start(rev)
+
+        for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
+            idxwrite.write(chunk)
+        for chunk in util.filechunkiter(dataread, limit=offset):
+            datawrite.write(chunk)
+
+        def rewriteindex(r, newoffs, newdata=None):
+            """Rewrite the index entry with a new data offset and new data.
+
+            The newdata argument, if given, is a tuple of three positive
+            integers: (new compressed, new uncompressed, added flag bits).
+            """
+            offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
+            flags = gettype(offlags)
+            if newdata:
+                comp, uncomp, nflags = newdata
+                flags |= nflags
+            offlags = offset_type(newoffs, flags)
+            e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
+            idxwrite.write(self._io.packentry(e, None, self.version, r))
+            idxread.seek(self._io.size, 1)
+
+        def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
+            """Write the given fulltext with the given data offset.
+
+            Returns:
+                The integer number of data bytes written, for tracking data
+                offsets.
+            """
+            flag, compdata = self.compress(data)
+            newcomp = len(flag) + len(compdata)
+            rewriteindex(r, offs, (newcomp, len(data), nflags))
+            datawrite.write(flag)
+            datawrite.write(compdata)
+            dataread.seek(self.length(r), 1)
+            return newcomp
+
+        # Rewrite censored entry with (padded) tombstone data.
+        pad = ' ' * (self.rawsize(rev) - len(tombstone))
+        offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
+
+        # Rewrite all following filelog revisions fixing up offsets and deltas.
+        for srev in pycompat.xrange(rev + 1, len(self)):
+            if rev in self.parentrevs(srev):
+                # Immediate children of censored node must be re-added as
+                # fulltext.
+                try:
+                    revdata = self.revision(srev)
+                except error.CensoredNodeError as e:
+                    revdata = e.tombstone
+                dlen = rewrite(srev, offset, revdata)
+            else:
+                # Copy any other revision data verbatim after fixing up the
+                # offset.
+                rewriteindex(srev, offset)
+                dlen = self.length(srev)
+                for chunk in util.filechunkiter(dataread, limit=dlen):
+                    datawrite.write(chunk)
+            offset += dlen
+
+        idxread.close()
+        idxwrite.close()
+        if dataread is not idxread:
+            dataread.close()
+            datawrite.close()