revlog: move censor logic out of censor extension
The censor extension is doing very low-level things with revlogs.
It is fundamentally impossible for this logic to remain in the censor
extension while support multiple storage backends: we need each
storage backend to implement censor in its own storage-specific
way.
This commit effectively moves the revlog-specific censoring code to
be a method of revlogs themselves.
We've defined a new API on the file storage interface for censoring
an individual node. Even though the current censoring code doesn't
use it, the API requires a transaction instance because it logically
makes sense for storage backends to require an active transaction
(which implies a held write lock) in order to rewrite storage.
After this commit, the censor extension has been reduced to
boilerplate precondition checking before invoking the generic
storage API.
I tried to keep the code as similar as possible. But some minor
changes were made:
* We use self._io instead of instantiating a new revlogio instance.
* We compare self.version against REVLOGV0 instead of != REVLOGV1
because presumably all future revlog versions will support censoring.
* We use self.opener instead of going through repo.svfs (we don't have
a handle on the repo instance from a revlog).
* "revlog" dropped
* Replace "flog" with "self".
Differential Revision: https://phab.mercurial-scm.org/D4656
--- a/hgext/censor.py Tue Sep 18 16:47:09 2018 -0700
+++ b/hgext/censor.py Tue Sep 18 17:51:43 2018 -0700
@@ -32,11 +32,8 @@
from mercurial import (
error,
- pycompat,
registrar,
- revlog,
scmutil,
- util,
)
cmdtable = {}
@@ -98,90 +95,5 @@
raise error.Abort(_('cannot censor working directory'),
hint=_('clean/delete/update first'))
- flogv = flog.version & 0xFFFF
- if flogv != revlog.REVLOGV1:
- raise error.Abort(
- _('censor does not support revlog version %d') % (flogv,))
-
- tombstone = revlog.packmeta({"censored": tombstone}, "")
-
- crev = fctx.filerev()
-
- if len(tombstone) > flog.rawsize(crev):
- raise error.Abort(_(
- 'censor tombstone must be no longer than censored data'))
-
- # Using two files instead of one makes it easy to rewrite entry-by-entry
- idxread = repo.svfs(flog.indexfile, 'r')
- idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
- if flog.version & revlog.FLAG_INLINE_DATA:
- dataread, datawrite = idxread, idxwrite
- else:
- dataread = repo.svfs(flog.datafile, 'r')
- datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
-
- # Copy all revlog data up to the entry to be censored.
- rio = revlog.revlogio()
- offset = flog.start(crev)
-
- for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
- idxwrite.write(chunk)
- for chunk in util.filechunkiter(dataread, limit=offset):
- datawrite.write(chunk)
-
- def rewriteindex(r, newoffs, newdata=None):
- """Rewrite the index entry with a new data offset and optional new data.
-
- The newdata argument, if given, is a tuple of three positive integers:
- (new compressed, new uncompressed, added flag bits).
- """
- offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
- flags = revlog.gettype(offlags)
- if newdata:
- comp, uncomp, nflags = newdata
- flags |= nflags
- offlags = revlog.offset_type(newoffs, flags)
- e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
- idxwrite.write(rio.packentry(e, None, flog.version, r))
- idxread.seek(rio.size, 1)
-
- def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
- """Write the given full text to the filelog with the given data offset.
-
- Returns:
- The integer number of data bytes written, for tracking data offsets.
- """
- flag, compdata = flog.compress(data)
- newcomp = len(flag) + len(compdata)
- rewriteindex(r, offs, (newcomp, len(data), nflags))
- datawrite.write(flag)
- datawrite.write(compdata)
- dataread.seek(flog.length(r), 1)
- return newcomp
-
- # Rewrite censored revlog entry with (padded) tombstone data.
- pad = ' ' * (flog.rawsize(crev) - len(tombstone))
- offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
-
- # Rewrite all following filelog revisions fixing up offsets and deltas.
- for srev in pycompat.xrange(crev + 1, len(flog)):
- if crev in flog.parentrevs(srev):
- # Immediate children of censored node must be re-added as fulltext.
- try:
- revdata = flog.revision(srev)
- except error.CensoredNodeError as e:
- revdata = e.tombstone
- dlen = rewrite(srev, offset, revdata)
- else:
- # Copy any other revision data verbatim after fixing up the offset.
- rewriteindex(srev, offset)
- dlen = flog.length(srev)
- for chunk in util.filechunkiter(dataread, limit=dlen):
- datawrite.write(chunk)
- offset += dlen
-
- idxread.close()
- idxwrite.close()
- if dataread is not idxread:
- dataread.close()
- datawrite.close()
+ with repo.transaction(b'censor') as tr:
+ flog.censorrevision(tr, fnode, tombstone=tombstone)
--- a/mercurial/filelog.py Tue Sep 18 16:47:09 2018 -0700
+++ b/mercurial/filelog.py Tue Sep 18 17:51:43 2018 -0700
@@ -111,6 +111,9 @@
def strip(self, minlink, transaction):
return self._revlog.strip(minlink, transaction)
+ def censorrevision(self, tr, node, tombstone=b''):
+ return self._revlog.censorrevision(node, tombstone=tombstone)
+
def files(self):
return self._revlog.files()
--- a/mercurial/repository.py Tue Sep 18 16:47:09 2018 -0700
+++ b/mercurial/repository.py Tue Sep 18 17:51:43 2018 -0700
@@ -691,6 +691,23 @@
even if it existed in the store previously.
"""
+ def censorrevision(tr, node, tombstone=b''):
+ """Remove the content of a single revision.
+
+ The specified ``node`` will have its content purged from storage.
+ Future attempts to access the revision data for this node will
+ result in failure.
+
+ A ``tombstone`` message can optionally be stored. This message may be
+ displayed to users when they attempt to access the missing revision
+ data.
+
+ Storage backends may have stored deltas against the previous content
+ in this revision. As part of censoring a revision, these storage
+ backends are expected to rewrite any internally stored deltas such
+ that they no longer reference the deleted content.
+ """
+
def getstrippoint(minlink):
"""Find the minimum revision that must be stripped to strip a linkrev.
--- a/mercurial/revlog.py Tue Sep 18 16:47:09 2018 -0700
+++ b/mercurial/revlog.py Tue Sep 18 17:51:43 2018 -0700
@@ -2492,3 +2492,92 @@
finally:
destrevlog._lazydeltabase = oldlazydeltabase
destrevlog._deltabothparents = oldamd
+
+ def censorrevision(self, node, tombstone=b''):
+ if (self.version & 0xFFFF) == REVLOGV0:
+ raise error.RevlogError(_('cannot censor with version %d revlogs') %
+ self.version)
+
+ rev = self.rev(node)
+ tombstone = packmeta({b'censored': tombstone}, b'')
+
+ if len(tombstone) > self.rawsize(rev):
+ raise error.Abort(_('censor tombstone must be no longer than '
+ 'censored data'))
+
+ # Using two files instead of one makes it easy to rewrite entry-by-entry
+ idxread = self.opener(self.indexfile, 'r')
+ idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
+ if self.version & FLAG_INLINE_DATA:
+ dataread, datawrite = idxread, idxwrite
+ else:
+ dataread = self.opener(self.datafile, 'r')
+ datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
+
+ # Copy all revlog data up to the entry to be censored.
+ offset = self.start(rev)
+
+ for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
+ idxwrite.write(chunk)
+ for chunk in util.filechunkiter(dataread, limit=offset):
+ datawrite.write(chunk)
+
+ def rewriteindex(r, newoffs, newdata=None):
+ """Rewrite the index entry with a new data offset and new data.
+
+ The newdata argument, if given, is a tuple of three positive
+ integers: (new compressed, new uncompressed, added flag bits).
+ """
+ offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
+ flags = gettype(offlags)
+ if newdata:
+ comp, uncomp, nflags = newdata
+ flags |= nflags
+ offlags = offset_type(newoffs, flags)
+ e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
+ idxwrite.write(self._io.packentry(e, None, self.version, r))
+ idxread.seek(self._io.size, 1)
+
+ def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
+ """Write the given fulltext with the given data offset.
+
+ Returns:
+ The integer number of data bytes written, for tracking data
+ offsets.
+ """
+ flag, compdata = self.compress(data)
+ newcomp = len(flag) + len(compdata)
+ rewriteindex(r, offs, (newcomp, len(data), nflags))
+ datawrite.write(flag)
+ datawrite.write(compdata)
+ dataread.seek(self.length(r), 1)
+ return newcomp
+
+ # Rewrite censored entry with (padded) tombstone data.
+ pad = ' ' * (self.rawsize(rev) - len(tombstone))
+ offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
+
+ # Rewrite all following filelog revisions fixing up offsets and deltas.
+ for srev in pycompat.xrange(rev + 1, len(self)):
+ if rev in self.parentrevs(srev):
+ # Immediate children of censored node must be re-added as
+ # fulltext.
+ try:
+ revdata = self.revision(srev)
+ except error.CensoredNodeError as e:
+ revdata = e.tombstone
+ dlen = rewrite(srev, offset, revdata)
+ else:
+ # Copy any other revision data verbatim after fixing up the
+ # offset.
+ rewriteindex(srev, offset)
+ dlen = self.length(srev)
+ for chunk in util.filechunkiter(dataread, limit=dlen):
+ datawrite.write(chunk)
+ offset += dlen
+
+ idxread.close()
+ idxwrite.close()
+ if dataread is not idxread:
+ dataread.close()
+ datawrite.close()