view hgext/convert/convcmd.py @ 16719:e7bf09acd410

localrepo: add branchtip() method for faster single-branch lookups For the PyPy repo with 744 branches and 843 branch heads, this brings hg log -r default over NFS from: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 3249 0 1.3222 1.3222 <open> 3244 0 0.6211 0.6211 <method 'close' of 'file' objects> 3243 0 0.0800 0.0800 <method 'read' of 'file' objects> 3241 0 0.0660 0.0660 <method 'seek' of 'file' objects> 3905 0 0.0476 0.0476 <zlib.decompress> 3281 0 2.6756 0.0472 mercurial.changelog:182(read) +3281 0 2.5256 0.0453 +mercurial.revlog:881(revision) +3276 0 0.0389 0.0196 +mercurial.changelog:28(decodeextra) +6562 0 0.0123 0.0123 +<method 'split' of 'str' objects> +6562 0 0.0408 0.0073 +mercurial.encoding:61(tolocal) +3281 0 0.0054 0.0054 +<method 'index' of 'str' objects> 3241 0 2.2464 0.0456 mercurial.revlog:818(_loadchunk) +3241 0 0.6205 0.6205 +<method 'close' of 'file' objects> +3241 0 0.0765 0.0765 +<method 'read' of 'file' objects> +3241 0 0.0660 0.0660 +<method 'seek' of 'file' objects> +3241 0 1.4209 0.0135 +mercurial.store:374(__call__) +3241 0 0.0122 0.0107 +mercurial.revlog:810(_addchunk) 3281 0 2.5256 0.0453 mercurial.revlog:881(revision) +3280 0 0.0175 0.0175 +mercurial.revlog:305(rev) +3281 0 2.2819 0.0119 +mercurial.revlog:847(_chunkraw) +3281 0 0.0603 0.0083 +mercurial.revlog:945(_checkhash) +3281 0 0.0051 0.0051 +mercurial.revlog:349(flags) +3281 0 0.0040 0.0040 +<mercurial.mpatch.patches> 13682 0 0.0479 0.0248 <method 'decode' of 'str' objects> +7418 0 0.0228 0.0076 +encodings.utf_8:15(decode) +1 0 0.0003 0.0000 +encodings:71(search_function) 3248 0 1.3995 0.0246 mercurial.scmutil:218(__call__) +3248 0 1.3222 1.3222 +<open> +3248 0 0.0235 0.0184 +os.path:80(split) +3248 0 0.0084 0.0068 +mercurial.scmutil:92(__call__) Time: real 2.750 secs (user 0.680+0.000 sys 0.360+0.000) down to: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 55 31 0.0197 0.0163 <__import__> +1 0 0.0006 0.0002 +mercurial.context:8(<module>) +1 0 0.0042 0.0001 +mercurial.revlog:12(<module>) +1 0 0.0002 0.0001 +mercurial.match:8(<module>) +1 0 0.0003 0.0001 +mercurial.dirstate:7(<module>) +1 0 0.0057 0.0001 +mercurial.changelog:8(<module>) 1 0 0.0117 0.0032 mercurial.localrepo:525(_readbranchcache) +844 0 0.0015 0.0015 +<binascii.unhexlify> +845 0 0.0010 0.0010 +<method 'split' of 'str' objects> +843 0 0.0045 0.0009 +mercurial.encoding:61(tolocal) +843 0 0.0004 0.0004 +<method 'setdefault' of 'dict' objects> +1 0 0.0003 0.0003 +<method 'close' of 'file' objects> 3 0 0.0029 0.0029 <method 'read' of 'file' objects> 9 0 0.0018 0.0018 <open> 990 0 0.0017 0.0017 <binascii.unhexlify> 53 0 0.0016 0.0016 mercurial.demandimport:43(__init__) 862 0 0.0015 0.0015 <_codecs.utf_8_decode> 862 0 0.0037 0.0014 <method 'decode' of 'str' objects> +862 0 0.0023 0.0008 +encodings.utf_8:15(decode) 981 0 0.0011 0.0011 <method 'split' of 'str' objects> 861 0 0.0046 0.0009 mercurial.encoding:61(tolocal) +861 0 0.0037 0.0014 +<method 'decode' of 'str' objects> 862 0 0.0023 0.0008 encodings.utf_8:15(decode) +862 0 0.0015 0.0015 +<_codecs.utf_8_decode> 4 0 0.0008 0.0008 <method 'close' of 'file' objects> 179 154 0.0202 0.0004 mercurial.demandimport:83(__getattribute__) +36 11 0.0199 0.0003 +mercurial.demandimport:55(_load) +72 0 0.0001 0.0001 +mercurial.demandimport:83(__getattribute__) +36 0 0.0000 0.0000 +<getattr> 1 0 0.0015 0.0004 mercurial.tags:148(_readtagcache) Time: real 0.060 secs (user 0.030+0.000 sys 0.010+0.000)
author Brodie Rao <brodie@sf.io>
date Sun, 13 May 2012 14:04:04 +0200
parents f366d4c2ff34
children eaf6a6d7f015
line wrap: on
line source

# convcmd - convert extension commands definition
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from common import NoRepo, MissingTool, SKIPREV, mapfile
from cvs import convert_cvs
from darcs import darcs_source
from git import convert_git
from hg import mercurial_source, mercurial_sink
from subversion import svn_source, svn_sink
from monotone import monotone_source
from gnuarch import gnuarch_source
from bzr import bzr_source
from p4 import p4_source
import filemap, common

import os, shutil
from mercurial import hg, util, encoding
from mercurial.i18n import _

orig_encoding = 'ascii'

def recode(s):
    if isinstance(s, unicode):
        return s.encode(orig_encoding, 'replace')
    else:
        return s.decode('utf-8').encode(orig_encoding, 'replace')

source_converters = [
    ('cvs', convert_cvs, 'branchsort'),
    ('git', convert_git, 'branchsort'),
    ('svn', svn_source, 'branchsort'),
    ('hg', mercurial_source, 'sourcesort'),
    ('darcs', darcs_source, 'branchsort'),
    ('mtn', monotone_source, 'branchsort'),
    ('gnuarch', gnuarch_source, 'branchsort'),
    ('bzr', bzr_source, 'branchsort'),
    ('p4', p4_source, 'branchsort'),
    ]

sink_converters = [
    ('hg', mercurial_sink),
    ('svn', svn_sink),
    ]

def convertsource(ui, path, type, rev):
    exceptions = []
    if type and type not in [s[0] for s in source_converters]:
        raise util.Abort(_('%s: invalid source repository type') % type)
    for name, source, sortmode in source_converters:
        try:
            if not type or name == type:
                return source(ui, path, rev), sortmode
        except (NoRepo, MissingTool), inst:
            exceptions.append(inst)
    if not ui.quiet:
        for inst in exceptions:
            ui.write("%s\n" % inst)
    raise util.Abort(_('%s: missing or unsupported repository') % path)

def convertsink(ui, path, type):
    if type and type not in [s[0] for s in sink_converters]:
        raise util.Abort(_('%s: invalid destination repository type') % type)
    for name, sink in sink_converters:
        try:
            if not type or name == type:
                return sink(ui, path)
        except NoRepo, inst:
            ui.note(_("convert: %s\n") % inst)
        except MissingTool, inst:
            raise util.Abort('%s\n' % inst)
    raise util.Abort(_('%s: unknown repository type') % path)

class progresssource(object):
    def __init__(self, ui, source, filecount):
        self.ui = ui
        self.source = source
        self.filecount = filecount
        self.retrieved = 0

    def getfile(self, file, rev):
        self.retrieved += 1
        self.ui.progress(_('getting files'), self.retrieved,
                         item=file, total=self.filecount)
        return self.source.getfile(file, rev)

    def lookuprev(self, rev):
        return self.source.lookuprev(rev)

    def close(self):
        self.ui.progress(_('getting files'), None)

class converter(object):
    def __init__(self, ui, source, dest, revmapfile, opts):

        self.source = source
        self.dest = dest
        self.ui = ui
        self.opts = opts
        self.commitcache = {}
        self.authors = {}
        self.authorfile = None

        # Record converted revisions persistently: maps source revision
        # ID to target revision ID (both strings).  (This is how
        # incremental conversions work.)
        self.map = mapfile(ui, revmapfile)

        # Read first the dst author map if any
        authorfile = self.dest.authorfile()
        if authorfile and os.path.exists(authorfile):
            self.readauthormap(authorfile)
        # Extend/Override with new author map if necessary
        if opts.get('authormap'):
            self.readauthormap(opts.get('authormap'))
            self.authorfile = self.dest.authorfile()

        self.splicemap = common.parsesplicemap(opts.get('splicemap'))
        self.branchmap = mapfile(ui, opts.get('branchmap'))

    def walktree(self, heads):
        '''Return a mapping that identifies the uncommitted parents of every
        uncommitted changeset.'''
        visit = heads
        known = set()
        parents = {}
        while visit:
            n = visit.pop(0)
            if n in known or n in self.map:
                continue
            known.add(n)
            self.ui.progress(_('scanning'), len(known), unit=_('revisions'))
            commit = self.cachecommit(n)
            parents[n] = []
            for p in commit.parents:
                parents[n].append(p)
                visit.append(p)
        self.ui.progress(_('scanning'), None)

        return parents

    def mergesplicemap(self, parents, splicemap):
        """A splicemap redefines child/parent relationships. Check the
        map contains valid revision identifiers and merge the new
        links in the source graph.
        """
        for c in splicemap:
            if c not in parents:
                if not self.dest.hascommit(self.map.get(c, c)):
                    # Could be in source but not converted during this run
                    self.ui.warn(_('splice map revision %s is not being '
                                   'converted, ignoring\n') % c)
                continue
            pc = []
            for p in splicemap[c]:
                # We do not have to wait for nodes already in dest.
                if self.dest.hascommit(self.map.get(p, p)):
                    continue
                # Parent is not in dest and not being converted, not good
                if p not in parents:
                    raise util.Abort(_('unknown splice map parent: %s') % p)
                pc.append(p)
            parents[c] = pc

    def toposort(self, parents, sortmode):
        '''Return an ordering such that every uncommitted changeset is
        preceeded by all its uncommitted ancestors.'''

        def mapchildren(parents):
            """Return a (children, roots) tuple where 'children' maps parent
            revision identifiers to children ones, and 'roots' is the list of
            revisions without parents. 'parents' must be a mapping of revision
            identifier to its parents ones.
            """
            visit = parents.keys()
            seen = set()
            children = {}
            roots = []

            while visit:
                n = visit.pop(0)
                if n in seen:
                    continue
                seen.add(n)
                # Ensure that nodes without parents are present in the
                # 'children' mapping.
                children.setdefault(n, [])
                hasparent = False
                for p in parents[n]:
                    if p not in self.map:
                        visit.append(p)
                        hasparent = True
                    children.setdefault(p, []).append(n)
                if not hasparent:
                    roots.append(n)

            return children, roots

        # Sort functions are supposed to take a list of revisions which
        # can be converted immediately and pick one

        def makebranchsorter():
            """If the previously converted revision has a child in the
            eligible revisions list, pick it. Return the list head
            otherwise. Branch sort attempts to minimize branch
            switching, which is harmful for Mercurial backend
            compression.
            """
            prev = [None]
            def picknext(nodes):
                next = nodes[0]
                for n in nodes:
                    if prev[0] in parents[n]:
                        next = n
                        break
                prev[0] = next
                return next
            return picknext

        def makesourcesorter():
            """Source specific sort."""
            keyfn = lambda n: self.commitcache[n].sortkey
            def picknext(nodes):
                return sorted(nodes, key=keyfn)[0]
            return picknext

        def makedatesorter():
            """Sort revisions by date."""
            dates = {}
            def getdate(n):
                if n not in dates:
                    dates[n] = util.parsedate(self.commitcache[n].date)
                return dates[n]

            def picknext(nodes):
                return min([(getdate(n), n) for n in nodes])[1]

            return picknext

        if sortmode == 'branchsort':
            picknext = makebranchsorter()
        elif sortmode == 'datesort':
            picknext = makedatesorter()
        elif sortmode == 'sourcesort':
            picknext = makesourcesorter()
        else:
            raise util.Abort(_('unknown sort mode: %s') % sortmode)

        children, actives = mapchildren(parents)

        s = []
        pendings = {}
        while actives:
            n = picknext(actives)
            actives.remove(n)
            s.append(n)

            # Update dependents list
            for c in children.get(n, []):
                if c not in pendings:
                    pendings[c] = [p for p in parents[c] if p not in self.map]
                try:
                    pendings[c].remove(n)
                except ValueError:
                    raise util.Abort(_('cycle detected between %s and %s')
                                       % (recode(c), recode(n)))
                if not pendings[c]:
                    # Parents are converted, node is eligible
                    actives.insert(0, c)
                    pendings[c] = None

        if len(s) != len(parents):
            raise util.Abort(_("not all revisions were sorted"))

        return s

    def writeauthormap(self):
        authorfile = self.authorfile
        if authorfile:
            self.ui.status(_('Writing author map file %s\n') % authorfile)
            ofile = open(authorfile, 'w+')
            for author in self.authors:
                ofile.write("%s=%s\n" % (author, self.authors[author]))
            ofile.close()

    def readauthormap(self, authorfile):
        afile = open(authorfile, 'r')
        for line in afile:

            line = line.strip()
            if not line or line.startswith('#'):
                continue

            try:
                srcauthor, dstauthor = line.split('=', 1)
            except ValueError:
                msg = _('Ignoring bad line in author map file %s: %s\n')
                self.ui.warn(msg % (authorfile, line.rstrip()))
                continue

            srcauthor = srcauthor.strip()
            dstauthor = dstauthor.strip()
            if self.authors.get(srcauthor) in (None, dstauthor):
                msg = _('mapping author %s to %s\n')
                self.ui.debug(msg % (srcauthor, dstauthor))
                self.authors[srcauthor] = dstauthor
                continue

            m = _('overriding mapping for author %s, was %s, will be %s\n')
            self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))

        afile.close()

    def cachecommit(self, rev):
        commit = self.source.getcommit(rev)
        commit.author = self.authors.get(commit.author, commit.author)
        commit.branch = self.branchmap.get(commit.branch, commit.branch)
        self.commitcache[rev] = commit
        return commit

    def copy(self, rev):
        commit = self.commitcache[rev]

        changes = self.source.getchanges(rev)
        if isinstance(changes, basestring):
            if changes == SKIPREV:
                dest = SKIPREV
            else:
                dest = self.map[changes]
            self.map[rev] = dest
            return
        files, copies = changes
        pbranches = []
        if commit.parents:
            for prev in commit.parents:
                if prev not in self.commitcache:
                    self.cachecommit(prev)
                pbranches.append((self.map[prev],
                                  self.commitcache[prev].branch))
        self.dest.setbranch(commit.branch, pbranches)
        try:
            parents = self.splicemap[rev]
            self.ui.status(_('spliced in %s as parents of %s\n') %
                           (parents, rev))
            parents = [self.map.get(p, p) for p in parents]
        except KeyError:
            parents = [b[0] for b in pbranches]
        source = progresssource(self.ui, self.source, len(files))
        newnode = self.dest.putcommit(files, copies, parents, commit,
                                      source, self.map)
        source.close()
        self.source.converted(rev, newnode)
        self.map[rev] = newnode

    def convert(self, sortmode):
        try:
            self.source.before()
            self.dest.before()
            self.source.setrevmap(self.map)
            self.ui.status(_("scanning source...\n"))
            heads = self.source.getheads()
            parents = self.walktree(heads)
            self.mergesplicemap(parents, self.splicemap)
            self.ui.status(_("sorting...\n"))
            t = self.toposort(parents, sortmode)
            num = len(t)
            c = None

            self.ui.status(_("converting...\n"))
            for i, c in enumerate(t):
                num -= 1
                desc = self.commitcache[c].desc
                if "\n" in desc:
                    desc = desc.splitlines()[0]
                # convert log message to local encoding without using
                # tolocal() because the encoding.encoding convert()
                # uses is 'utf-8'
                self.ui.status("%d %s\n" % (num, recode(desc)))
                self.ui.note(_("source: %s\n") % recode(c))
                self.ui.progress(_('converting'), i, unit=_('revisions'),
                                 total=len(t))
                self.copy(c)
            self.ui.progress(_('converting'), None)

            tags = self.source.gettags()
            ctags = {}
            for k in tags:
                v = tags[k]
                if self.map.get(v, SKIPREV) != SKIPREV:
                    ctags[k] = self.map[v]

            if c and ctags:
                nrev, tagsparent = self.dest.puttags(ctags)
                if nrev and tagsparent:
                    # write another hash correspondence to override the previous
                    # one so we don't end up with extra tag heads
                    tagsparents = [e for e in self.map.iteritems()
                                   if e[1] == tagsparent]
                    if tagsparents:
                        self.map[tagsparents[0][0]] = nrev

            bookmarks = self.source.getbookmarks()
            cbookmarks = {}
            for k in bookmarks:
                v = bookmarks[k]
                if self.map.get(v, SKIPREV) != SKIPREV:
                    cbookmarks[k] = self.map[v]

            if c and cbookmarks:
                self.dest.putbookmarks(cbookmarks)

            self.writeauthormap()
        finally:
            self.cleanup()

    def cleanup(self):
        try:
            self.dest.after()
        finally:
            self.source.after()
        self.map.close()

def convert(ui, src, dest=None, revmapfile=None, **opts):
    global orig_encoding
    orig_encoding = encoding.encoding
    encoding.encoding = 'UTF-8'

    # support --authors as an alias for --authormap
    if not opts.get('authormap'):
        opts['authormap'] = opts.get('authors')

    if not dest:
        dest = hg.defaultdest(src) + "-hg"
        ui.status(_("assuming destination %s\n") % dest)

    destc = convertsink(ui, dest, opts.get('dest_type'))

    try:
        srcc, defaultsort = convertsource(ui, src, opts.get('source_type'),
                                          opts.get('rev'))
    except Exception:
        for path in destc.created:
            shutil.rmtree(path, True)
        raise

    sortmodes = ('branchsort', 'datesort', 'sourcesort')
    sortmode = [m for m in sortmodes if opts.get(m)]
    if len(sortmode) > 1:
        raise util.Abort(_('more than one sort mode specified'))
    sortmode = sortmode and sortmode[0] or defaultsort
    if sortmode == 'sourcesort' and not srcc.hasnativeorder():
        raise util.Abort(_('--sourcesort is not supported by this data source'))

    fmap = opts.get('filemap')
    if fmap:
        srcc = filemap.filemap_source(ui, srcc, fmap)
        destc.setfilemapmode(True)

    if not revmapfile:
        try:
            revmapfile = destc.revmapfile()
        except Exception:
            revmapfile = os.path.join(destc, "map")

    c = converter(ui, srcc, destc, revmapfile, opts)
    c.convert(sortmode)