view hgext/git/gitlog.py @ 44677:4e2845d9452d

tests: collect all branch creation in one place in test-copies-chain-merge.t I found this test case really hard to read because it requires scrolling back and forth between the setup and the verification. The next patch will move the verification close to the merge commits they test. This patch prepares for that by moving all branch creation first so they are separate from the merge commits (because many merge commits reuse the same branches). Differential Revision: https://phab.mercurial-scm.org/D8376
author Martin von Zweigbergk <martinvonz@google.com>
date Thu, 02 Apr 2020 13:45:10 -0700
parents ec54b3d2af0b
children 8bfc6cc8e480
line wrap: on
line source

from __future__ import absolute_import

from mercurial.i18n import _

from mercurial import (
    ancestor,
    changelog as hgchangelog,
    dagop,
    encoding,
    error,
    manifest,
    node as nodemod,
    pycompat,
)
from mercurial.interfaces import (
    repository,
    util as interfaceutil,
)
from mercurial.utils import stringutil
from . import (
    gitutil,
    index,
    manifest as gitmanifest,
)

pygit2 = gitutil.get_pygit2()


class baselog(object):  # revlog.revlog):
    """Common implementations between changelog and manifestlog."""

    def __init__(self, gr, db):
        self.gitrepo = gr
        self._db = db

    def __len__(self):
        return int(
            self._db.execute('SELECT COUNT(*) FROM changelog').fetchone()[0]
        )

    def rev(self, n):
        if n == nodemod.nullid:
            return -1
        t = self._db.execute(
            'SELECT rev FROM changelog WHERE node = ?', (gitutil.togitnode(n),)
        ).fetchone()
        if t is None:
            raise error.LookupError(n, b'00changelog.i', _(b'no node %d'))
        return t[0]

    def node(self, r):
        if r == nodemod.nullrev:
            return nodemod.nullid
        t = self._db.execute(
            'SELECT node FROM changelog WHERE rev = ?', (r,)
        ).fetchone()
        if t is None:
            raise error.LookupError(r, b'00changelog.i', _(b'no node'))
        return nodemod.bin(t[0])

    def hasnode(self, n):
        t = self._db.execute(
            'SELECT node FROM changelog WHERE node = ?', (n,)
        ).fetchone()
        return t is not None


class baselogindex(object):
    def __init__(self, log):
        self._log = log

    def has_node(self, n):
        return self._log.rev(n) != -1

    def __len__(self):
        return len(self._log)

    def __getitem__(self, idx):
        p1rev, p2rev = self._log.parentrevs(idx)
        # TODO: it's messy that the index leaks so far out of the
        # storage layer that we have to implement things like reading
        # this raw tuple, which exposes revlog internals.
        return (
            # Pretend offset is just the index, since we don't really care.
            idx,
            # Same with lengths
            idx,  # length
            idx,  # rawsize
            -1,  # delta base
            idx,  # linkrev TODO is this right?
            p1rev,
            p2rev,
            self._log.node(idx),
        )


# TODO: an interface for the changelog type?
class changelog(baselog):
    def __contains__(self, rev):
        try:
            self.node(rev)
            return True
        except error.LookupError:
            return False

    def __iter__(self):
        return iter(pycompat.xrange(len(self)))

    @property
    def filteredrevs(self):
        # TODO: we should probably add a refs/hg/ namespace for hidden
        # heads etc, but that's an idea for later.
        return set()

    @property
    def index(self):
        return baselogindex(self)

    @property
    def nodemap(self):
        r = {
            nodemod.bin(v[0]): v[1]
            for v in self._db.execute('SELECT node, rev FROM changelog')
        }
        r[nodemod.nullid] = nodemod.nullrev
        return r

    def tip(self):
        t = self._db.execute(
            'SELECT node FROM changelog ORDER BY rev DESC LIMIT 1'
        ).fetchone()
        if t:
            return nodemod.bin(t[0])
        return nodemod.nullid

    def revs(self, start=0, stop=None):
        if stop is None:
            stop = self.tip()
        t = self._db.execute(
            'SELECT rev FROM changelog '
            'WHERE rev >= ? AND rev <= ? '
            'ORDER BY REV ASC',
            (start, stop),
        )
        return (int(r[0]) for r in t)

    def _partialmatch(self, id):
        if nodemod.wdirhex.startswith(id):
            raise error.WdirUnsupported
        candidates = [
            nodemod.bin(x[0])
            for x in self._db.execute(
                'SELECT node FROM changelog WHERE node LIKE ?', (id + b'%',)
            )
        ]
        if nodemod.nullhex.startswith(id):
            candidates.append(nodemod.nullid)
        if len(candidates) > 1:
            raise error.AmbiguousPrefixLookupError(
                id, b'00changelog.i', _(b'ambiguous identifier')
            )
        if candidates:
            return candidates[0]
        return None

    def flags(self, rev):
        return 0

    def shortest(self, node, minlength=1):
        nodehex = nodemod.hex(node)
        for attempt in pycompat.xrange(minlength, len(nodehex) + 1):
            candidate = nodehex[:attempt]
            matches = int(
                self._db.execute(
                    'SELECT COUNT(*) FROM changelog WHERE node LIKE ?',
                    (pycompat.sysstr(candidate + b'%'),),
                ).fetchone()[0]
            )
            if matches == 1:
                return candidate
        return nodehex

    def headrevs(self, revs=None):
        realheads = [
            int(x[0])
            for x in self._db.execute(
                'SELECT rev FROM changelog '
                'INNER JOIN heads ON changelog.node = heads.node'
            )
        ]
        if revs:
            return sorted([r for r in revs if r in realheads])
        return sorted(realheads)

    def changelogrevision(self, nodeorrev):
        # Ensure we have a node id
        if isinstance(nodeorrev, int):
            n = self.node(nodeorrev)
        else:
            n = nodeorrev
        # handle looking up nullid
        if n == nodemod.nullid:
            return hgchangelog._changelogrevision(extra={})
        hn = gitutil.togitnode(n)
        # We've got a real commit!
        files = [
            r[0]
            for r in self._db.execute(
                'SELECT filename FROM changedfiles '
                'WHERE node = ? and filenode != ?',
                (hn, gitutil.nullgit),
            )
        ]
        filesremoved = [
            r[0]
            for r in self._db.execute(
                'SELECT filename FROM changedfiles '
                'WHERE node = ? and filenode = ?',
                (hn, nodemod.nullhex),
            )
        ]
        c = self.gitrepo[hn]
        return hgchangelog._changelogrevision(
            manifest=n,  # pretend manifest the same as the commit node
            user=b'%s <%s>'
            % (c.author.name.encode('utf8'), c.author.email.encode('utf8')),
            date=(c.author.time, -c.author.offset * 60),
            files=files,
            # TODO filesadded in the index
            filesremoved=filesremoved,
            description=c.message.encode('utf8'),
            # TODO do we want to handle extra? how?
            extra={b'branch': b'default'},
        )

    def ancestors(self, revs, stoprev=0, inclusive=False):
        revs = list(revs)
        tip = self.rev(self.tip())
        for r in revs:
            if r > tip:
                raise IndexError(b'Invalid rev %r' % r)
        return ancestor.lazyancestors(
            self.parentrevs, revs, stoprev=stoprev, inclusive=inclusive
        )

    # Cleanup opportunity: this is *identical* to the revlog.py version
    def descendants(self, revs):
        return dagop.descendantrevs(revs, self.revs, self.parentrevs)

    def reachableroots(self, minroot, heads, roots, includepath=False):
        return dagop._reachablerootspure(
            self.parentrevs, minroot, roots, heads, includepath
        )

    # Cleanup opportunity: this is *identical* to the revlog.py version
    def isancestor(self, a, b):
        a, b = self.rev(a), self.rev(b)
        return self.isancestorrev(a, b)

    # Cleanup opportunity: this is *identical* to the revlog.py version
    def isancestorrev(self, a, b):
        if a == nodemod.nullrev:
            return True
        elif a == b:
            return True
        elif a > b:
            return False
        return bool(self.reachableroots(a, [b], [a], includepath=False))

    def parentrevs(self, rev):
        n = self.node(rev)
        hn = gitutil.togitnode(n)
        c = self.gitrepo[hn]
        p1 = p2 = nodemod.nullrev
        if c.parents:
            p1 = self.rev(c.parents[0].id.raw)
            if len(c.parents) > 2:
                raise error.Abort(b'TODO octopus merge handling')
            if len(c.parents) == 2:
                p2 = self.rev(c.parents[1].id.raw)
        return p1, p2

    # Private method is used at least by the tags code.
    _uncheckedparentrevs = parentrevs

    def commonancestorsheads(self, a, b):
        # TODO the revlog verson of this has a C path, so we probably
        # need to optimize this...
        a, b = self.rev(a), self.rev(b)
        return [
            self.node(n)
            for n in ancestor.commonancestorsheads(self.parentrevs, a, b)
        ]

    def branchinfo(self, rev):
        """Git doesn't do named branches, so just put everything on default."""
        return b'default', False

    def delayupdate(self, tr):
        # TODO: I think we can elide this because we're just dropping
        # an object in the git repo?
        pass

    def add(
        self,
        manifest,
        files,
        desc,
        transaction,
        p1,
        p2,
        user,
        date=None,
        extra=None,
        p1copies=None,
        p2copies=None,
        filesadded=None,
        filesremoved=None,
    ):
        parents = []
        hp1, hp2 = gitutil.togitnode(p1), gitutil.togitnode(p2)
        if p1 != nodemod.nullid:
            parents.append(hp1)
        if p2 and p2 != nodemod.nullid:
            parents.append(hp2)
        assert date is not None
        timestamp, tz = date
        sig = pygit2.Signature(
            encoding.unifromlocal(stringutil.person(user)),
            encoding.unifromlocal(stringutil.email(user)),
            timestamp,
            -(tz // 60),
        )
        oid = self.gitrepo.create_commit(
            None, sig, sig, desc, gitutil.togitnode(manifest), parents
        )
        # Set up an internal reference to force the commit into the
        # changelog. Hypothetically, we could even use this refs/hg/
        # namespace to allow for anonymous heads on git repos, which
        # would be neat.
        self.gitrepo.references.create(
            'refs/hg/internal/latest-commit', oid, force=True
        )
        # Reindex now to pick up changes. We omit the progress
        # callback because this will be very quick.
        index._index_repo(self.gitrepo, self._db)
        return oid.raw


class manifestlog(baselog):
    def __getitem__(self, node):
        return self.get(b'', node)

    def get(self, relpath, node):
        if node == nodemod.nullid:
            # TODO: this should almost certainly be a memgittreemanifestctx
            return manifest.memtreemanifestctx(self, relpath)
        commit = self.gitrepo[gitutil.togitnode(node)]
        t = commit.tree
        if relpath:
            parts = relpath.split(b'/')
            for p in parts:
                te = t[p]
                t = self.gitrepo[te.id]
        return gitmanifest.gittreemanifestctx(self.gitrepo, t)


@interfaceutil.implementer(repository.ifilestorage)
class filelog(baselog):
    def __init__(self, gr, db, path):
        super(filelog, self).__init__(gr, db)
        assert isinstance(path, bytes)
        self.path = path

    def read(self, node):
        if node == nodemod.nullid:
            return b''
        return self.gitrepo[gitutil.togitnode(node)].data

    def lookup(self, node):
        if len(node) not in (20, 40):
            node = int(node)
        if isinstance(node, int):
            assert False, b'todo revnums for nodes'
        if len(node) == 40:
            node = nodemod.bin(node)
        hnode = gitutil.togitnode(node)
        if hnode in self.gitrepo:
            return node
        raise error.LookupError(self.path, node, _(b'no match found'))

    def cmp(self, node, text):
        """Returns True if text is different than content at `node`."""
        return self.read(node) != text

    def add(self, text, meta, transaction, link, p1=None, p2=None):
        assert not meta  # Should we even try to handle this?
        return self.gitrepo.create_blob(text).raw

    def __iter__(self):
        for clrev in self._db.execute(
            '''
SELECT rev FROM changelog
INNER JOIN changedfiles ON changelog.node = changedfiles.node
WHERE changedfiles.filename = ? AND changedfiles.filenode != ?
        ''',
            (pycompat.fsdecode(self.path), gitutil.nullgit),
        ):
            yield clrev[0]

    def linkrev(self, fr):
        return fr

    def rev(self, node):
        row = self._db.execute(
            '''
SELECT rev FROM changelog
INNER JOIN changedfiles ON changelog.node = changedfiles.node
WHERE changedfiles.filename = ? AND changedfiles.filenode = ?''',
            (pycompat.fsdecode(self.path), gitutil.togitnode(node)),
        ).fetchone()
        if row is None:
            raise error.LookupError(self.path, node, _(b'no such node'))
        return int(row[0])

    def node(self, rev):
        maybe = self._db.execute(
            '''SELECT filenode FROM changedfiles
INNER JOIN changelog ON changelog.node = changedfiles.node
WHERE changelog.rev = ? AND filename = ?
''',
            (rev, pycompat.fsdecode(self.path)),
        ).fetchone()
        if maybe is None:
            raise IndexError('gitlog %r out of range %d' % (self.path, rev))
        return nodemod.bin(maybe[0])

    def parents(self, node):
        gn = gitutil.togitnode(node)
        gp = pycompat.fsdecode(self.path)
        ps = []
        for p in self._db.execute(
            '''SELECT p1filenode, p2filenode FROM changedfiles
WHERE filenode = ? AND filename = ?
''',
            (gn, gp),
        ).fetchone():
            if p is None:
                commit = self._db.execute(
                    "SELECT node FROM changedfiles "
                    "WHERE filenode = ? AND filename = ?",
                    (gn, gp),
                ).fetchone()[0]
                # This filelog is missing some data. Build the
                # filelog, then recurse (which will always find data).
                if pycompat.ispy3:
                    commit = commit.decode('ascii')
                index.fill_in_filelog(self.gitrepo, self._db, commit, gp, gn)
                return self.parents(node)
            else:
                ps.append(nodemod.bin(p))
        return ps

    def renamed(self, node):
        # TODO: renames/copies
        return False