hgext/convert/p4.py
author Augie Fackler <raf@durin42.com>
Fri, 17 Oct 2014 13:52:10 -0400
changeset 23031 3c0983cc279e
parent 22300 35ab037de989
child 24395 216fa1ba9993
permissions -rw-r--r--
i18n: cache the result of every gettext call In looking at profiler output for 'hg log' on mozilla-central, I noticed we spent a _huge_ amount of time in gettext relative to what it's doing. Caching provides a roughly 15% performance improvement even on repositories as small as hg. == hg repo on linux == Before: % cumulative self time seconds seconds name 5.05 0.19 0.19 i18n.py:62:gettext 4.84 0.18 0.18 revlog.py:88:decompress 2.95 0.17 0.11 changelog.py:201:node 2.32 0.09 0.09 ui.py:577:write 2.11 0.08 0.08 i18n.py:72:gettext 2.11 0.08 0.08 obsolete.py:196:_fm0readmarkers 1.89 0.07 0.07 obsolete.py:569:_load 1.68 0.63 0.06 localrepo.py:29:__get__ real 0m4.026s user 0m3.993s sys 0m0.034s After: % cumulative self time seconds seconds name 8.05 0.26 0.26 revlog.py:88:decompress 2.68 0.22 0.09 color.py:395:write 2.20 0.07 0.07 obsolete.py:196:_fm0readmarkers 1.95 0.06 0.06 obsolete.py:174:_fm0readmarkers 1.95 0.06 0.06 ui.py:577:write 1.95 0.06 0.06 util.py:1228:datestr 1.71 0.06 0.06 utf_8.py:16:decode 1.71 0.06 0.06 revlog.py:273:__len__ real 0m3.519s user 0m3.447s sys 0m0.073s == mozilla-central repo on linux == Before: % cumulative self time seconds seconds name 7.72 2.35 2.35 revlog.py:88:decompress 4.46 1.36 1.36 i18n.py:62:gettext 2.22 0.67 0.67 i18n.py:72:gettext 2.19 1.14 0.67 changelog.py:201:node 2.16 0.66 0.66 ui.py:577:write 1.96 0.60 0.60 utf_8.py:16:decode 1.93 1.97 0.59 color.py:395:write 1.85 0.81 0.56 changelog.py:136:tip real 0m30.822s user 0m30.660s sys 0m0.149s After: % cumulative self time seconds seconds name 9.82 2.49 2.49 revlog.py:88:decompress 2.67 1.31 0.68 localrepo.py:29:__get__ 2.57 0.65 0.65 utf_8.py:16:decode 2.48 1.01 0.63 changelog.py:201:node 2.10 0.82 0.53 changelog.py:136:tip 2.01 0.51 0.51 ui.py:577:write 1.91 0.49 0.49 util.py:1232:datestr 1.85 1.65 0.47 color.py:395:write real 0m25.619s user 0m25.446s sys 0m0.166s == cpython repo on os x = Before: % cumulative self time seconds seconds name 5.05 1.35 1.35 cmdutil.py:982:_show 4.59 1.22 1.22 revlog.py:274:__len__ 3.98 1.06 1.06 i18n.py:62:gettext 3.91 1.04 1.04 revlog.py:1016:revision 3.68 0.98 0.98 revlog.py:337:parents 3.45 0.92 0.92 revlog.py:88:decompress 2.91 0.78 0.78 revlog.py:309:rev 2.62 0.70 0.70 revlog.py:1033:revision real 0m30.414s user 0m28.145s sys 0m0.541s After: % cumulative self time seconds seconds name 7.98 1.66 1.66 cmdutil.py:982:_show 6.83 1.42 1.42 changelog.py:46:decodeextra 5.18 1.08 1.08 revlog.py:274:__len__ 3.94 0.82 0.82 revlog.py:1016:revision 3.41 0.71 0.71 revlog.py:309:rev 3.32 0.69 0.69 revlog.py:88:decompress 2.99 0.63 0.62 revlog.py:1033:revision 2.69 0.56 0.56 revlog.py:341:start real 0m22.811s user 0m21.883s sys 0m0.397s

# Perforce source for convert extension.
#
# Copyright 2009, Frank Kingswood <frank@kingswood-consulting.co.uk>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from mercurial import util
from mercurial.i18n import _

from common import commit, converter_source, checktool, NoRepo
import marshal
import re

def loaditer(f):
    "Yield the dictionary objects generated by p4"
    try:
        while True:
            d = marshal.load(f)
            if not d:
                break
            yield d
    except EOFError:
        pass

class p4_source(converter_source):
    def __init__(self, ui, path, rev=None):
        super(p4_source, self).__init__(ui, path, rev=rev)

        if "/" in path and not path.startswith('//'):
            raise NoRepo(_('%s does not look like a P4 repository') % path)

        checktool('p4', abort=False)

        self.p4changes = {}
        self.heads = {}
        self.changeset = {}
        self.files = {}
        self.tags = {}
        self.lastbranch = {}
        self.parent = {}
        self.encoding = "latin_1"
        self.depotname = {}           # mapping from local name to depot name
        self.re_type = re.compile(
            "([a-z]+)?(text|binary|symlink|apple|resource|unicode|utf\d+)"
            "(\+\w+)?$")
        self.re_keywords = re.compile(
            r"\$(Id|Header|Date|DateTime|Change|File|Revision|Author)"
            r":[^$\n]*\$")
        self.re_keywords_old = re.compile("\$(Id|Header):[^$\n]*\$")

        self._parse(ui, path)

    def _parse_view(self, path):
        "Read changes affecting the path"
        cmd = 'p4 -G changes -s submitted %s' % util.shellquote(path)
        stdout = util.popen(cmd, mode='rb')
        for d in loaditer(stdout):
            c = d.get("change", None)
            if c:
                self.p4changes[c] = True

    def _parse(self, ui, path):
        "Prepare list of P4 filenames and revisions to import"
        ui.status(_('reading p4 views\n'))

        # read client spec or view
        if "/" in path:
            self._parse_view(path)
            if path.startswith("//") and path.endswith("/..."):
                views = {path[:-3]:""}
            else:
                views = {"//": ""}
        else:
            cmd = 'p4 -G client -o %s' % util.shellquote(path)
            clientspec = marshal.load(util.popen(cmd, mode='rb'))

            views = {}
            for client in clientspec:
                if client.startswith("View"):
                    sview, cview = clientspec[client].split()
                    self._parse_view(sview)
                    if sview.endswith("...") and cview.endswith("..."):
                        sview = sview[:-3]
                        cview = cview[:-3]
                    cview = cview[2:]
                    cview = cview[cview.find("/") + 1:]
                    views[sview] = cview

        # list of changes that affect our source files
        self.p4changes = self.p4changes.keys()
        self.p4changes.sort(key=int)

        # list with depot pathnames, longest first
        vieworder = views.keys()
        vieworder.sort(key=len, reverse=True)

        # handle revision limiting
        startrev = self.ui.config('convert', 'p4.startrev', default=0)
        self.p4changes = [x for x in self.p4changes
                          if ((not startrev or int(x) >= int(startrev)) and
                              (not self.rev or int(x) <= int(self.rev)))]

        # now read the full changelists to get the list of file revisions
        ui.status(_('collecting p4 changelists\n'))
        lastid = None
        for change in self.p4changes:
            cmd = "p4 -G describe -s %s" % change
            stdout = util.popen(cmd, mode='rb')
            d = marshal.load(stdout)
            desc = self.recode(d.get("desc", ""))
            shortdesc = desc.split("\n", 1)[0]
            t = '%s %s' % (d["change"], repr(shortdesc)[1:-1])
            ui.status(util.ellipsis(t, 80) + '\n')

            if lastid:
                parents = [lastid]
            else:
                parents = []

            date = (int(d["time"]), 0)     # timezone not set
            c = commit(author=self.recode(d["user"]),
                       date=util.datestr(date, '%Y-%m-%d %H:%M:%S %1%2'),
                       parents=parents, desc=desc, branch='',
                       extra={"p4": change})

            files = []
            i = 0
            while ("depotFile%d" % i) in d and ("rev%d" % i) in d:
                oldname = d["depotFile%d" % i]
                filename = None
                for v in vieworder:
                    if oldname.startswith(v):
                        filename = views[v] + oldname[len(v):]
                        break
                if filename:
                    files.append((filename, d["rev%d" % i]))
                    self.depotname[filename] = oldname
                i += 1
            self.changeset[change] = c
            self.files[change] = files
            lastid = change

        if lastid:
            self.heads = [lastid]

    def getheads(self):
        return self.heads

    def getfile(self, name, rev):
        cmd = 'p4 -G print %s' \
            % util.shellquote("%s#%s" % (self.depotname[name], rev))
        stdout = util.popen(cmd, mode='rb')

        mode = None
        contents = ""
        keywords = None

        for d in loaditer(stdout):
            code = d["code"]
            data = d.get("data")

            if code == "error":
                raise IOError(d["generic"], data)

            elif code == "stat":
                if d.get("action") == "purge":
                    return None, None
                p4type = self.re_type.match(d["type"])
                if p4type:
                    mode = ""
                    flags = (p4type.group(1) or "") + (p4type.group(3) or "")
                    if "x" in flags:
                        mode = "x"
                    if p4type.group(2) == "symlink":
                        mode = "l"
                    if "ko" in flags:
                        keywords = self.re_keywords_old
                    elif "k" in flags:
                        keywords = self.re_keywords

            elif code == "text" or code == "binary":
                contents += data

        if mode is None:
            return None, None

        if keywords:
            contents = keywords.sub("$\\1$", contents)
        if mode == "l" and contents.endswith("\n"):
            contents = contents[:-1]

        return contents, mode

    def getchanges(self, rev, full):
        if full:
            raise util.Abort(_("convert from p4 do not support --full"))
        return self.files[rev], {}

    def getcommit(self, rev):
        return self.changeset[rev]

    def gettags(self):
        return self.tags

    def getchangedfiles(self, rev, i):
        return sorted([x[0] for x in self.files[rev]])