revlog: move censor logic into main revlog class
authorGregory Szorc <gregory.szorc@gmail.com>
Thu, 05 Apr 2018 16:31:45 -0700
changeset 37443 65250a66b55c
parent 37442 0596d27457c6
child 37444 c8666a9e9e11
revlog: move censor logic into main revlog class Previously, the revlog class implemented dummy methods for various censor-related functionality. Revision censoring was (and will continue to be) only possible on filelog instances. So filelog implemented these methods to perform something reasonable. A problem with implementing censoring on filelog is that it assumes filelog is a revlog. Upcoming work to formalize the filelog interface will make this not true. Furthermore, the censoring logic is security-sensitive. I think action-at-a-distance with custom implementation of core revlog APIs in derived classes is a bit dangerous. I think at a minimum the censor logic should live in revlog.py. I was tempted to created a "censored revlog" class that basically pulled these methods out of filelog. But, I wasn't a huge fan of overriding core methods in child classes. A reason to do that would be performance. However, the censoring code only comes into play when: * hash verification fails * delta generation * applying deltas from changegroups The new code is conditional on an instance attribute. So the overhead for running the censored code when the revlog isn't censorable is an attribute lookup. All of these operations are at least a magnitude slower than a Python attribute lookup. So there shouldn't be a performance concern. Differential Revision: https://phab.mercurial-scm.org/D3151
mercurial/filelog.py
mercurial/revlog.py
--- a/mercurial/filelog.py	Thu Apr 05 18:22:35 2018 -0700
+++ b/mercurial/filelog.py	Thu Apr 05 16:31:45 2018 -0700
@@ -7,27 +7,20 @@
 
 from __future__ import absolute_import
 
-import struct
-
 from .thirdparty.zope import (
     interface as zi,
 )
 from . import (
-    error,
-    mdiff,
     repository,
     revlog,
 )
 
-def _censoredtext(text):
-    m, offs = revlog.parsemeta(text)
-    return m and "censored" in m
-
 @zi.implementer(repository.ifilestorage)
 class filelog(revlog.revlog):
     def __init__(self, opener, path):
         super(filelog, self).__init__(opener,
-                        "/".join(("data", path + ".i")))
+                        "/".join(("data", path + ".i")),
+                                      censorable=True)
         # full name of the user visible file, relative to the repository root
         self.filename = path
 
@@ -90,35 +83,3 @@
             return t2 != text
 
         return True
-
-    def checkhash(self, text, node, p1=None, p2=None, rev=None):
-        try:
-            super(filelog, self).checkhash(text, node, p1=p1, p2=p2, rev=rev)
-        except error.RevlogError:
-            if _censoredtext(text):
-                raise error.CensoredNodeError(self.indexfile, node, text)
-            raise
-
-    def iscensored(self, rev):
-        """Check if a file revision is censored."""
-        return self.flags(rev) & revlog.REVIDX_ISCENSORED
-
-    def _peek_iscensored(self, baserev, delta, flush):
-        """Quickly check if a delta produces a censored revision."""
-        # Fragile heuristic: unless new file meta keys are added alphabetically
-        # preceding "censored", all censored revisions are prefixed by
-        # "\1\ncensored:". A delta producing such a censored revision must be a
-        # full-replacement delta, so we inspect the first and only patch in the
-        # delta for this prefix.
-        hlen = struct.calcsize(">lll")
-        if len(delta) <= hlen:
-            return False
-
-        oldlen = self.rawsize(baserev)
-        newlen = len(delta) - hlen
-        if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
-            return False
-
-        add = "\1\ncensored:"
-        addlen = len(add)
-        return newlen >= addlen and delta[hlen:hlen + addlen] == add
--- a/mercurial/revlog.py	Thu Apr 05 18:22:35 2018 -0700
+++ b/mercurial/revlog.py	Thu Apr 05 16:31:45 2018 -0700
@@ -117,6 +117,10 @@
     metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
     return "\1\n%s\1\n%s" % (metatext, text)
 
+def _censoredtext(text):
+    m, offs = parsemeta(text)
+    return m and "censored" in m
+
 def addflagprocessor(flag, processor):
     """Register a flag processor on a revision data flag.
 
@@ -574,9 +578,11 @@
     If mmaplargeindex is True, and an mmapindexthreshold is set, the
     index will be mmapped rather than read if it is larger than the
     configured threshold.
+
+    If censorable is True, the revlog can have censored revisions.
     """
     def __init__(self, opener, indexfile, datafile=None, checkambig=False,
-                 mmaplargeindex=False):
+                 mmaplargeindex=False, censorable=False):
         """
         create a revlog object
 
@@ -589,6 +595,7 @@
         #  When True, indexfile is opened with checkambig=True at writing, to
         #  avoid file stat ambiguity.
         self._checkambig = checkambig
+        self._censorable = censorable
         # 3-tuple of (node, rev, text) for a raw revision.
         self._cache = None
         # Maps rev to chain base rev.
@@ -1867,14 +1874,19 @@
         Available as a function so that subclasses can extend hash mismatch
         behaviors as needed.
         """
-        if p1 is None and p2 is None:
-            p1, p2 = self.parents(node)
-        if node != self.hash(text, p1, p2):
-            revornode = rev
-            if revornode is None:
-                revornode = templatefilters.short(hex(node))
-            raise RevlogError(_("integrity check failed on %s:%s")
-                % (self.indexfile, pycompat.bytestr(revornode)))
+        try:
+            if p1 is None and p2 is None:
+                p1, p2 = self.parents(node)
+            if node != self.hash(text, p1, p2):
+                revornode = rev
+                if revornode is None:
+                    revornode = templatefilters.short(hex(node))
+                raise RevlogError(_("integrity check failed on %s:%s")
+                    % (self.indexfile, pycompat.bytestr(revornode)))
+        except RevlogError:
+            if self._censorable and _censoredtext(text):
+                raise error.CensoredNodeError(self.indexfile, node, text)
+            raise
 
     def _enforceinlinesize(self, tr, fp=None):
         """Check if the revlog is too big for inline and convert if so.
@@ -2300,11 +2312,33 @@
 
     def iscensored(self, rev):
         """Check if a file revision is censored."""
-        return False
+        if not self._censorable:
+            return False
+
+        return self.flags(rev) & REVIDX_ISCENSORED
 
     def _peek_iscensored(self, baserev, delta, flush):
         """Quickly check if a delta produces a censored revision."""
-        return False
+        if not self._censorable:
+            return False
+
+        # Fragile heuristic: unless new file meta keys are added alphabetically
+        # preceding "censored", all censored revisions are prefixed by
+        # "\1\ncensored:". A delta producing such a censored revision must be a
+        # full-replacement delta, so we inspect the first and only patch in the
+        # delta for this prefix.
+        hlen = struct.calcsize(">lll")
+        if len(delta) <= hlen:
+            return False
+
+        oldlen = self.rawsize(baserev)
+        newlen = len(delta) - hlen
+        if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
+            return False
+
+        add = "\1\ncensored:"
+        addlen = len(add)
+        return newlen >= addlen and delta[hlen:hlen + addlen] == add
 
     def getstrippoint(self, minlink):
         """find the minimum rev that must be stripped to strip the linkrev