revlog: move censor logic into main revlog class
Previously, the revlog class implemented dummy methods for
various censor-related functionality. Revision censoring was
(and will continue to be) only possible on filelog instances.
So filelog implemented these methods to perform something
reasonable.
A problem with implementing censoring on filelog is that
it assumes filelog is a revlog. Upcoming work to formalize
the filelog interface will make this not true.
Furthermore, the censoring logic is security-sensitive. I
think action-at-a-distance with custom implementation of core
revlog APIs in derived classes is a bit dangerous. I think at
a minimum the censor logic should live in revlog.py.
I was tempted to created a "censored revlog" class that
basically pulled these methods out of filelog. But, I wasn't
a huge fan of overriding core methods in child classes. A
reason to do that would be performance. However, the censoring
code only comes into play when:
* hash verification fails
* delta generation
* applying deltas from changegroups
The new code is conditional on an instance attribute. So the
overhead for running the censored code when the revlog isn't
censorable is an attribute lookup. All of these operations are
at least a magnitude slower than a Python attribute lookup. So
there shouldn't be a performance concern.
Differential Revision: https://phab.mercurial-scm.org/D3151
--- a/mercurial/filelog.py Thu Apr 05 18:22:35 2018 -0700
+++ b/mercurial/filelog.py Thu Apr 05 16:31:45 2018 -0700
@@ -7,27 +7,20 @@
from __future__ import absolute_import
-import struct
-
from .thirdparty.zope import (
interface as zi,
)
from . import (
- error,
- mdiff,
repository,
revlog,
)
-def _censoredtext(text):
- m, offs = revlog.parsemeta(text)
- return m and "censored" in m
-
@zi.implementer(repository.ifilestorage)
class filelog(revlog.revlog):
def __init__(self, opener, path):
super(filelog, self).__init__(opener,
- "/".join(("data", path + ".i")))
+ "/".join(("data", path + ".i")),
+ censorable=True)
# full name of the user visible file, relative to the repository root
self.filename = path
@@ -90,35 +83,3 @@
return t2 != text
return True
-
- def checkhash(self, text, node, p1=None, p2=None, rev=None):
- try:
- super(filelog, self).checkhash(text, node, p1=p1, p2=p2, rev=rev)
- except error.RevlogError:
- if _censoredtext(text):
- raise error.CensoredNodeError(self.indexfile, node, text)
- raise
-
- def iscensored(self, rev):
- """Check if a file revision is censored."""
- return self.flags(rev) & revlog.REVIDX_ISCENSORED
-
- def _peek_iscensored(self, baserev, delta, flush):
- """Quickly check if a delta produces a censored revision."""
- # Fragile heuristic: unless new file meta keys are added alphabetically
- # preceding "censored", all censored revisions are prefixed by
- # "\1\ncensored:". A delta producing such a censored revision must be a
- # full-replacement delta, so we inspect the first and only patch in the
- # delta for this prefix.
- hlen = struct.calcsize(">lll")
- if len(delta) <= hlen:
- return False
-
- oldlen = self.rawsize(baserev)
- newlen = len(delta) - hlen
- if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
- return False
-
- add = "\1\ncensored:"
- addlen = len(add)
- return newlen >= addlen and delta[hlen:hlen + addlen] == add
--- a/mercurial/revlog.py Thu Apr 05 18:22:35 2018 -0700
+++ b/mercurial/revlog.py Thu Apr 05 16:31:45 2018 -0700
@@ -117,6 +117,10 @@
metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
return "\1\n%s\1\n%s" % (metatext, text)
+def _censoredtext(text):
+ m, offs = parsemeta(text)
+ return m and "censored" in m
+
def addflagprocessor(flag, processor):
"""Register a flag processor on a revision data flag.
@@ -574,9 +578,11 @@
If mmaplargeindex is True, and an mmapindexthreshold is set, the
index will be mmapped rather than read if it is larger than the
configured threshold.
+
+ If censorable is True, the revlog can have censored revisions.
"""
def __init__(self, opener, indexfile, datafile=None, checkambig=False,
- mmaplargeindex=False):
+ mmaplargeindex=False, censorable=False):
"""
create a revlog object
@@ -589,6 +595,7 @@
# When True, indexfile is opened with checkambig=True at writing, to
# avoid file stat ambiguity.
self._checkambig = checkambig
+ self._censorable = censorable
# 3-tuple of (node, rev, text) for a raw revision.
self._cache = None
# Maps rev to chain base rev.
@@ -1867,14 +1874,19 @@
Available as a function so that subclasses can extend hash mismatch
behaviors as needed.
"""
- if p1 is None and p2 is None:
- p1, p2 = self.parents(node)
- if node != self.hash(text, p1, p2):
- revornode = rev
- if revornode is None:
- revornode = templatefilters.short(hex(node))
- raise RevlogError(_("integrity check failed on %s:%s")
- % (self.indexfile, pycompat.bytestr(revornode)))
+ try:
+ if p1 is None and p2 is None:
+ p1, p2 = self.parents(node)
+ if node != self.hash(text, p1, p2):
+ revornode = rev
+ if revornode is None:
+ revornode = templatefilters.short(hex(node))
+ raise RevlogError(_("integrity check failed on %s:%s")
+ % (self.indexfile, pycompat.bytestr(revornode)))
+ except RevlogError:
+ if self._censorable and _censoredtext(text):
+ raise error.CensoredNodeError(self.indexfile, node, text)
+ raise
def _enforceinlinesize(self, tr, fp=None):
"""Check if the revlog is too big for inline and convert if so.
@@ -2300,11 +2312,33 @@
def iscensored(self, rev):
"""Check if a file revision is censored."""
- return False
+ if not self._censorable:
+ return False
+
+ return self.flags(rev) & REVIDX_ISCENSORED
def _peek_iscensored(self, baserev, delta, flush):
"""Quickly check if a delta produces a censored revision."""
- return False
+ if not self._censorable:
+ return False
+
+ # Fragile heuristic: unless new file meta keys are added alphabetically
+ # preceding "censored", all censored revisions are prefixed by
+ # "\1\ncensored:". A delta producing such a censored revision must be a
+ # full-replacement delta, so we inspect the first and only patch in the
+ # delta for this prefix.
+ hlen = struct.calcsize(">lll")
+ if len(delta) <= hlen:
+ return False
+
+ oldlen = self.rawsize(baserev)
+ newlen = len(delta) - hlen
+ if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
+ return False
+
+ add = "\1\ncensored:"
+ addlen = len(add)
+ return newlen >= addlen and delta[hlen:hlen + addlen] == add
def getstrippoint(self, minlink):
"""find the minimum rev that must be stripped to strip the linkrev