view hgext/convert/p4.py @ 43812:bf0453866c80

fuzz: use a variable to allow specifying python-config to use Eventually we should probably default this to just `python-config` and have the oss-fuzz build.sh script specify the sanpy python-config, but for now this lets us make progress. Differential Revision: https://phab.mercurial-scm.org/D7563
author Augie Fackler <augie@google.com>
date Fri, 06 Dec 2019 15:15:05 -0500
parents be8552f25cab
children 3af293735d0f
line wrap: on
line source

# Perforce source for convert extension.
#
# Copyright 2009, Frank Kingswood <frank@kingswood-consulting.co.uk>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import

import marshal
import re

from mercurial.i18n import _
from mercurial import (
    error,
    util,
)
from mercurial.utils import (
    dateutil,
    procutil,
    stringutil,
)

from . import common


def loaditer(f):
    """Yield the dictionary objects generated by p4"""
    try:
        while True:
            d = marshal.load(f)
            if not d:
                break
            yield d
    except EOFError:
        pass


def decodefilename(filename):
    """Perforce escapes special characters @, #, *, or %
    with %40, %23, %2A, or %25 respectively

    >>> decodefilename(b'portable-net45%252Bnetcore45%252Bwp8%252BMonoAndroid')
    'portable-net45%2Bnetcore45%2Bwp8%2BMonoAndroid'
    >>> decodefilename(b'//Depot/Directory/%2525/%2523/%23%40.%2A')
    '//Depot/Directory/%25/%23/#@.*'
    """
    replacements = [
        (b'%2A', b'*'),
        (b'%23', b'#'),
        (b'%40', b'@'),
        (b'%25', b'%'),
    ]
    for k, v in replacements:
        filename = filename.replace(k, v)
    return filename


class p4_source(common.converter_source):
    def __init__(self, ui, repotype, path, revs=None):
        # avoid import cycle
        from . import convcmd

        super(p4_source, self).__init__(ui, repotype, path, revs=revs)

        if b"/" in path and not path.startswith(b'//'):
            raise common.NoRepo(
                _(b'%s does not look like a P4 repository') % path
            )

        common.checktool(b'p4', abort=False)

        self.revmap = {}
        self.encoding = self.ui.config(
            b'convert', b'p4.encoding', convcmd.orig_encoding
        )
        self.re_type = re.compile(
            br"([a-z]+)?(text|binary|symlink|apple|resource|unicode|utf\d+)"
            br"(\+\w+)?$"
        )
        self.re_keywords = re.compile(
            br"\$(Id|Header|Date|DateTime|Change|File|Revision|Author)"
            br":[^$\n]*\$"
        )
        self.re_keywords_old = re.compile(br"\$(Id|Header):[^$\n]*\$")

        if revs and len(revs) > 1:
            raise error.Abort(
                _(
                    b"p4 source does not support specifying "
                    b"multiple revisions"
                )
            )

    def setrevmap(self, revmap):
        """Sets the parsed revmap dictionary.

        Revmap stores mappings from a source revision to a target revision.
        It is set in convertcmd.convert and provided by the user as a file
        on the commandline.

        Revisions in the map are considered beeing present in the
        repository and ignored during _parse(). This allows for incremental
        imports if a revmap is provided.
        """
        self.revmap = revmap

    def _parse_view(self, path):
        """Read changes affecting the path"""
        cmd = b'p4 -G changes -s submitted %s' % procutil.shellquote(path)
        stdout = procutil.popen(cmd, mode=b'rb')
        p4changes = {}
        for d in loaditer(stdout):
            c = d.get(b"change", None)
            if c:
                p4changes[c] = True
        return p4changes

    def _parse(self, ui, path):
        """Prepare list of P4 filenames and revisions to import"""
        p4changes = {}
        changeset = {}
        files_map = {}
        copies_map = {}
        localname = {}
        depotname = {}
        heads = []

        ui.status(_(b'reading p4 views\n'))

        # read client spec or view
        if b"/" in path:
            p4changes.update(self._parse_view(path))
            if path.startswith(b"//") and path.endswith(b"/..."):
                views = {path[:-3]: b""}
            else:
                views = {b"//": b""}
        else:
            cmd = b'p4 -G client -o %s' % procutil.shellquote(path)
            clientspec = marshal.load(procutil.popen(cmd, mode=b'rb'))

            views = {}
            for client in clientspec:
                if client.startswith(b"View"):
                    sview, cview = clientspec[client].split()
                    p4changes.update(self._parse_view(sview))
                    if sview.endswith(b"...") and cview.endswith(b"..."):
                        sview = sview[:-3]
                        cview = cview[:-3]
                    cview = cview[2:]
                    cview = cview[cview.find(b"/") + 1 :]
                    views[sview] = cview

        # list of changes that affect our source files
        p4changes = p4changes.keys()
        p4changes.sort(key=int)

        # list with depot pathnames, longest first
        vieworder = views.keys()
        vieworder.sort(key=len, reverse=True)

        # handle revision limiting
        startrev = self.ui.config(b'convert', b'p4.startrev')

        # now read the full changelists to get the list of file revisions
        ui.status(_(b'collecting p4 changelists\n'))
        lastid = None
        for change in p4changes:
            if startrev and int(change) < int(startrev):
                continue
            if self.revs and int(change) > int(self.revs[0]):
                continue
            if change in self.revmap:
                # Ignore already present revisions, but set the parent pointer.
                lastid = change
                continue

            if lastid:
                parents = [lastid]
            else:
                parents = []

            d = self._fetch_revision(change)
            c = self._construct_commit(d, parents)

            descarr = c.desc.splitlines(True)
            if len(descarr) > 0:
                shortdesc = descarr[0].rstrip(b'\r\n')
            else:
                shortdesc = b'**empty changelist description**'

            t = b'%s %s' % (c.rev, repr(shortdesc)[1:-1])
            ui.status(stringutil.ellipsis(t, 80) + b'\n')

            files = []
            copies = {}
            copiedfiles = []
            i = 0
            while (b"depotFile%d" % i) in d and (b"rev%d" % i) in d:
                oldname = d[b"depotFile%d" % i]
                filename = None
                for v in vieworder:
                    if oldname.lower().startswith(v.lower()):
                        filename = decodefilename(views[v] + oldname[len(v) :])
                        break
                if filename:
                    files.append((filename, d[b"rev%d" % i]))
                    depotname[filename] = oldname
                    if d.get(b"action%d" % i) == b"move/add":
                        copiedfiles.append(filename)
                    localname[oldname] = filename
                i += 1

            # Collect information about copied files
            for filename in copiedfiles:
                oldname = depotname[filename]

                flcmd = b'p4 -G filelog %s' % procutil.shellquote(oldname)
                flstdout = procutil.popen(flcmd, mode=b'rb')

                copiedfilename = None
                for d in loaditer(flstdout):
                    copiedoldname = None

                    i = 0
                    while (b"change%d" % i) in d:
                        if (
                            d[b"change%d" % i] == change
                            and d[b"action%d" % i] == b"move/add"
                        ):
                            j = 0
                            while (b"file%d,%d" % (i, j)) in d:
                                if d[b"how%d,%d" % (i, j)] == b"moved from":
                                    copiedoldname = d[b"file%d,%d" % (i, j)]
                                    break
                                j += 1
                        i += 1

                    if copiedoldname and copiedoldname in localname:
                        copiedfilename = localname[copiedoldname]
                        break

                if copiedfilename:
                    copies[filename] = copiedfilename
                else:
                    ui.warn(
                        _(b"cannot find source for copied file: %s@%s\n")
                        % (filename, change)
                    )

            changeset[change] = c
            files_map[change] = files
            copies_map[change] = copies
            lastid = change

        if lastid and len(changeset) > 0:
            heads = [lastid]

        return {
            b'changeset': changeset,
            b'files': files_map,
            b'copies': copies_map,
            b'heads': heads,
            b'depotname': depotname,
        }

    @util.propertycache
    def _parse_once(self):
        return self._parse(self.ui, self.path)

    @util.propertycache
    def copies(self):
        return self._parse_once[b'copies']

    @util.propertycache
    def files(self):
        return self._parse_once[b'files']

    @util.propertycache
    def changeset(self):
        return self._parse_once[b'changeset']

    @util.propertycache
    def heads(self):
        return self._parse_once[b'heads']

    @util.propertycache
    def depotname(self):
        return self._parse_once[b'depotname']

    def getheads(self):
        return self.heads

    def getfile(self, name, rev):
        cmd = b'p4 -G print %s' % procutil.shellquote(
            b"%s#%s" % (self.depotname[name], rev)
        )

        lasterror = None
        while True:
            stdout = procutil.popen(cmd, mode=b'rb')

            mode = None
            contents = []
            keywords = None

            for d in loaditer(stdout):
                code = d[b"code"]
                data = d.get(b"data")

                if code == b"error":
                    # if this is the first time error happened
                    # re-attempt getting the file
                    if not lasterror:
                        lasterror = IOError(d[b"generic"], data)
                        # this will exit inner-most for-loop
                        break
                    else:
                        raise lasterror

                elif code == b"stat":
                    action = d.get(b"action")
                    if action in [b"purge", b"delete", b"move/delete"]:
                        return None, None
                    p4type = self.re_type.match(d[b"type"])
                    if p4type:
                        mode = b""
                        flags = (p4type.group(1) or b"") + (
                            p4type.group(3) or b""
                        )
                        if b"x" in flags:
                            mode = b"x"
                        if p4type.group(2) == b"symlink":
                            mode = b"l"
                        if b"ko" in flags:
                            keywords = self.re_keywords_old
                        elif b"k" in flags:
                            keywords = self.re_keywords

                elif code == b"text" or code == b"binary":
                    contents.append(data)

                lasterror = None

            if not lasterror:
                break

        if mode is None:
            return None, None

        contents = b''.join(contents)

        if keywords:
            contents = keywords.sub(b"$\\1$", contents)
        if mode == b"l" and contents.endswith(b"\n"):
            contents = contents[:-1]

        return contents, mode

    def getchanges(self, rev, full):
        if full:
            raise error.Abort(_(b"convert from p4 does not support --full"))
        return self.files[rev], self.copies[rev], set()

    def _construct_commit(self, obj, parents=None):
        """
        Constructs a common.commit object from an unmarshalled
        `p4 describe` output
        """
        desc = self.recode(obj.get(b"desc", b""))
        date = (int(obj[b"time"]), 0)  # timezone not set
        if parents is None:
            parents = []

        return common.commit(
            author=self.recode(obj[b"user"]),
            date=dateutil.datestr(date, b'%Y-%m-%d %H:%M:%S %1%2'),
            parents=parents,
            desc=desc,
            branch=None,
            rev=obj[b'change'],
            extra={b"p4": obj[b'change'], b"convert_revision": obj[b'change']},
        )

    def _fetch_revision(self, rev):
        """Return an output of `p4 describe` including author, commit date as
        a dictionary."""
        cmd = b"p4 -G describe -s %s" % rev
        stdout = procutil.popen(cmd, mode=b'rb')
        return marshal.load(stdout)

    def getcommit(self, rev):
        if rev in self.changeset:
            return self.changeset[rev]
        elif rev in self.revmap:
            d = self._fetch_revision(rev)
            return self._construct_commit(d, parents=None)
        raise error.Abort(
            _(b"cannot find %s in the revmap or parsed changesets") % rev
        )

    def gettags(self):
        return {}

    def getchangedfiles(self, rev, i):
        return sorted([x[0] for x in self.files[rev]])