mercurial/filelog.py
author FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
Wed, 24 Feb 2016 06:10:46 +0900
changeset 28265 332926212ef8
parent 25948 34bd1a5eef5b
child 30584 be5b2098a817
permissions -rw-r--r--
repoview: discard filtered changelog if index isn't shared with unfiltered Before this patch, revisions rollbacked at failure of previous transaction might be visible at subsequent operations unintentionally, if repoview object is reused even after failure of transaction: e.g. command server and HTTP server are typical cases. 'repoview' uses the tuple of values below of unfiltered changelog as "the key" to examine validity of filtered changelog cache. - length - tip node - filtered revisions (as hashed value) - '_delayed' field 'repoview' compares between "the key" of unfiltered changelog at previous caching and now, and reuses filtered changelog cache if no change is detected. But this comparison indicates only that there is no change between unfiltered 'repo.changelog' at last caching and now, but not that filtered changelog cache is valid for current unfiltered one. 'repoview' uses "shallow copy" of unfiltered changelog to create filtered changelog cache. In this case, 'index' buffer of unfiltered changelog is also referred by filtered changelog. At failure of transaction, unfiltered changelog itself is invalidated (= un-referred) on the 'repo' side (see 0a7610758c42 also). But 'index' of it still contains revisions to be rollbacked at this failure, and is referred by filtered changelog. Therefore, even if there is no change between unfiltered 'repo.changelog' at last caching and now, steps below makes rollbacked revisions visible via filtered changelog unintentionally. 1. instantiate unfiltered changelog as 'repo.changelog' (call it CL1) 2. make filtered (= shallow copy of) CL1 (call it FCL1) 3. cache FCL1 with "the key" of CL1 4. revisions are appended to 'index', which is shared by CL1 and FCL1 5. invalidate 'repo.changelog' (= CL1) at failure of transaction 6. instantiate 'repo.changelog' again at next operation (call it CL2) CL2 doesn't have revisions added at (4), because it is instantiated from '00changelog.i', which isn't changed while failed transaction. 7. compare between "the key" of CL1 and CL2 8. FCL1 cached at (3) is reused, because comparison at (7) doesn't detect change between CL1 at (1) and CL2 9. revisions rollbacked at (5) are visible via FCL1 unintentionally, because FCL1 still refers 'index' changed at (4) The root cause of this issue is that there is no examination about validity of filtered changelog cache against current unfiltered one. This patch discards filtered changelog cache, if its 'index' object isn't shared with unfiltered one. BTW, at the time of this patch, redundant truncation of '00changelog.i' at failure of transaction (see 0a7610758c42 for detail) often prevents "hg serve" from making already rollbacked revisions visible, because updating timestamps of '00changelog.i' by truncation makes "hg serve" discard old repoview object with invalid filtered changelog cache. This is reason why this issue is overlooked before this patch, even though test-bundle2-exchange.t has tests in similar situation: failure of "hg push" via HTTP by pretxnclose hook on server side doesn't prevent subsequent commands from looking up outgoing revisions correctly. But timestamp on the filesystem doesn't have enough resolution for recent computation power, and it can't be assumed that this avoidance always works as expected. Therefore, without this patch, this issue might appear occasionally.

# filelog.py - file history class for mercurial
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import re
import struct

from . import (
    error,
    mdiff,
    revlog,
)

_mdre = re.compile('\1\n')
def parsemeta(text):
    """return (metadatadict, keylist, metadatasize)"""
    # text can be buffer, so we can't use .startswith or .index
    if text[:2] != '\1\n':
        return None, None
    s = _mdre.search(text, 2).start()
    mtext = text[2:s]
    meta = {}
    for l in mtext.splitlines():
        k, v = l.split(": ", 1)
        meta[k] = v
    return meta, (s + 2)

def packmeta(meta, text):
    keys = sorted(meta.iterkeys())
    metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
    return "\1\n%s\1\n%s" % (metatext, text)

def _censoredtext(text):
    m, offs = parsemeta(text)
    return m and "censored" in m

class filelog(revlog.revlog):
    def __init__(self, opener, path):
        super(filelog, self).__init__(opener,
                        "/".join(("data", path + ".i")))

    def read(self, node):
        t = self.revision(node)
        if not t.startswith('\1\n'):
            return t
        s = t.index('\1\n', 2)
        return t[s + 2:]

    def add(self, text, meta, transaction, link, p1=None, p2=None):
        if meta or text.startswith('\1\n'):
            text = packmeta(meta, text)
        return self.addrevision(text, transaction, link, p1, p2)

    def renamed(self, node):
        if self.parents(node)[0] != revlog.nullid:
            return False
        t = self.revision(node)
        m = parsemeta(t)[0]
        if m and "copy" in m:
            return (m["copy"], revlog.bin(m["copyrev"]))
        return False

    def size(self, rev):
        """return the size of a given revision"""

        # for revisions with renames, we have to go the slow way
        node = self.node(rev)
        if self.renamed(node):
            return len(self.read(node))
        if self.iscensored(rev):
            return 0

        # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
        return super(filelog, self).size(rev)

    def cmp(self, node, text):
        """compare text with a given file revision

        returns True if text is different than what is stored.
        """

        t = text
        if text.startswith('\1\n'):
            t = '\1\n\1\n' + text

        samehashes = not super(filelog, self).cmp(node, t)
        if samehashes:
            return False

        # censored files compare against the empty file
        if self.iscensored(self.rev(node)):
            return text != ''

        # renaming a file produces a different hash, even if the data
        # remains unchanged. Check if it's the case (slow):
        if self.renamed(node):
            t2 = self.read(node)
            return t2 != text

        return True

    def checkhash(self, text, p1, p2, node, rev=None):
        try:
            super(filelog, self).checkhash(text, p1, p2, node, rev=rev)
        except error.RevlogError:
            if _censoredtext(text):
                raise error.CensoredNodeError(self.indexfile, node, text)
            raise

    def iscensored(self, rev):
        """Check if a file revision is censored."""
        return self.flags(rev) & revlog.REVIDX_ISCENSORED

    def _peek_iscensored(self, baserev, delta, flush):
        """Quickly check if a delta produces a censored revision."""
        # Fragile heuristic: unless new file meta keys are added alphabetically
        # preceding "censored", all censored revisions are prefixed by
        # "\1\ncensored:". A delta producing such a censored revision must be a
        # full-replacement delta, so we inspect the first and only patch in the
        # delta for this prefix.
        hlen = struct.calcsize(">lll")
        if len(delta) <= hlen:
            return False

        oldlen = self.rawsize(baserev)
        newlen = len(delta) - hlen
        if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
            return False

        add = "\1\ncensored:"
        addlen = len(add)
        return newlen >= addlen and delta[hlen:hlen + addlen] == add