hgext/convert/__init__.py
author Bryan O'Sullivan <bos@serpentine.com>
Mon, 06 Aug 2007 14:59:00 -0700
changeset 5133 745cffe59ca8
parent 5127 39b6eaee6fd7
child 5134 c14968344d19
permissions -rw-r--r--
convert: use '.' as destination name if renaming subdir into root

# convert.py Foreign SCM converter
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.

from common import NoRepo, converter_source, converter_sink, decodeargs
from cvs import convert_cvs
from git import convert_git
from hg import mercurial_source, mercurial_sink
from subversion import convert_svn

import os, shlex, shutil, sys
from mercurial import hg, ui, util, commands
from mercurial.i18n import _

commands.norepo += " convert debug-svn-log"

converters = [convert_cvs, convert_git, convert_svn, mercurial_source,
              mercurial_sink]

def convertsource(ui, path, **opts):
    for c in converters:
        try:
            return c.getcommit and c(ui, path, **opts)
        except (AttributeError, NoRepo):
            pass
    raise util.Abort('%s: unknown repository type' % path)

def convertsink(ui, path):
    if not os.path.isdir(path):
        raise util.Abort("%s: not a directory" % path)
    for c in converters:
        try:
            return c.putcommit and c(ui, path)
        except (AttributeError, NoRepo):
            pass
    raise util.Abort('%s: unknown repository type' % path)

class convert(object):
    def __init__(self, ui, source, dest, revmapfile, filemapper, opts):

        self.source = source
        self.dest = dest
        self.ui = ui
        self.opts = opts
        self.commitcache = {}
        self.revmapfile = revmapfile
        self.revmapfilefd = None
        self.authors = {}
        self.authorfile = None
        self.mapfile = filemapper

        self.map = {}
        try:
            origrevmapfile = open(self.revmapfile, 'r')
            for l in origrevmapfile:
                sv, dv = l[:-1].split()
                self.map[sv] = dv
            origrevmapfile.close()
        except IOError:
            pass

        # Read first the dst author map if any
        authorfile = self.dest.authorfile()
        if authorfile and os.path.exists(authorfile):
            self.readauthormap(authorfile)
        # Extend/Override with new author map if necessary
        if opts.get('authors'):
            self.readauthormap(opts.get('authors'))
            self.authorfile = self.dest.authorfile()

    def walktree(self, heads):
        '''Return a mapping that identifies the uncommitted parents of every
        uncommitted changeset.'''
        visit = heads
        known = {}
        parents = {}
        while visit:
            n = visit.pop(0)
            if n in known or n in self.map: continue
            known[n] = 1
            self.commitcache[n] = self.source.getcommit(n)
            cp = self.commitcache[n].parents
            parents[n] = []
            for p in cp:
                parents[n].append(p)
                visit.append(p)

        return parents

    def toposort(self, parents):
        '''Return an ordering such that every uncommitted changeset is
        preceeded by all its uncommitted ancestors.'''
        visit = parents.keys()
        seen = {}
        children = {}

        while visit:
            n = visit.pop(0)
            if n in seen: continue
            seen[n] = 1
            # Ensure that nodes without parents are present in the 'children'
            # mapping.
            children.setdefault(n, [])
            for p in parents[n]:
                if not p in self.map:
                    visit.append(p)
                children.setdefault(p, []).append(n)

        s = []
        removed = {}
        visit = children.keys()
        while visit:
            n = visit.pop(0)
            if n in removed: continue
            dep = 0
            if n in parents:
                for p in parents[n]:
                    if p in self.map: continue
                    if p not in removed:
                        # we're still dependent
                        visit.append(n)
                        dep = 1
                        break

            if not dep:
                # all n's parents are in the list
                removed[n] = 1
                if n not in self.map:
                    s.append(n)
                if n in children:
                    for c in children[n]:
                        visit.insert(0, c)

        if self.opts.get('datesort'):
            depth = {}
            for n in s:
                depth[n] = 0
                pl = [p for p in self.commitcache[n].parents
                      if p not in self.map]
                if pl:
                    depth[n] = max([depth[p] for p in pl]) + 1

            s = [(depth[n], self.commitcache[n].date, n) for n in s]
            s.sort()
            s = [e[2] for e in s]

        return s

    def mapentry(self, src, dst):
        if self.revmapfilefd is None:
            try:
                self.revmapfilefd = open(self.revmapfile, "a")
            except IOError, (errno, strerror):
                raise util.Abort("Could not open map file %s: %s, %s\n" % (self.revmapfile, errno, strerror))
        self.map[src] = dst
        self.revmapfilefd.write("%s %s\n" % (src, dst))
        self.revmapfilefd.flush()

    def writeauthormap(self):
        authorfile = self.authorfile
        if authorfile:
           self.ui.status('Writing author map file %s\n' % authorfile)
           ofile = open(authorfile, 'w+')
           for author in self.authors:
               ofile.write("%s=%s\n" % (author, self.authors[author]))
           ofile.close()

    def readauthormap(self, authorfile):
        afile = open(authorfile, 'r')
        for line in afile:
            try:
                srcauthor = line.split('=')[0].strip()
                dstauthor = line.split('=')[1].strip()
                if srcauthor in self.authors and dstauthor != self.authors[srcauthor]:
                    self.ui.status(
                        'Overriding mapping for author %s, was %s, will be %s\n'
                        % (srcauthor, self.authors[srcauthor], dstauthor))
                else:
                    self.ui.debug('Mapping author %s to %s\n'
                                  % (srcauthor, dstauthor))
                    self.authors[srcauthor] = dstauthor
            except IndexError:
                self.ui.warn(
                    'Ignoring bad line in author file map %s: %s\n'
                    % (authorfile, line))
        afile.close()

    def copy(self, rev):
        commit = self.commitcache[rev]
        do_copies = hasattr(self.dest, 'copyfile')
        filenames = []

        files, copies = self.source.getchanges(rev)
        for f, v in files:
            newf = self.mapfile(f)
            if not newf:
                continue
            filenames.append(newf)
            try:
                data = self.source.getfile(f, v)
            except IOError, inst:
                self.dest.delfile(newf)
            else:
                e = self.source.getmode(f, v)
                self.dest.putfile(newf, e, data)
                if do_copies:
                    if f in copies:
                        copyf = self.mapfile(copies[f])
                        if copyf:
                            # Merely marks that a copy happened.
                            self.dest.copyfile(copyf, newf)

        parents = [self.map[r] for r in commit.parents]
        newnode = self.dest.putcommit(filenames, parents, commit)
        self.mapentry(rev, newnode)

    def convert(self):
        try:
            self.dest.before()
            self.source.setrevmap(self.map)
            self.ui.status("scanning source...\n")
            heads = self.source.getheads()
            parents = self.walktree(heads)
            self.ui.status("sorting...\n")
            t = self.toposort(parents)
            num = len(t)
            c = None

            self.ui.status("converting...\n")
            for c in t:
                num -= 1
                desc = self.commitcache[c].desc
                if "\n" in desc:
                    desc = desc.splitlines()[0]
                author = self.commitcache[c].author
                author = self.authors.get(author, author)
                self.commitcache[c].author = author
                self.ui.status("%d %s\n" % (num, desc))
                self.copy(c)

            tags = self.source.gettags()
            ctags = {}
            for k in tags:
                v = tags[k]
                if v in self.map:
                    ctags[k] = self.map[v]

            if c and ctags:
                nrev = self.dest.puttags(ctags)
                # write another hash correspondence to override the previous
                # one so we don't end up with extra tag heads
                if nrev:
                    self.mapentry(c, nrev)

            self.writeauthormap()
        finally:
            self.cleanup()

    def cleanup(self):
        self.dest.after()
        if self.revmapfilefd:
            self.revmapfilefd.close()

def rpairs(name):
    e = len(name)
    while e != -1:
        yield name[:e], name[e+1:]
        e = name.rfind('/', 0, e)

class filemapper(object):
    '''Map and filter filenames when importing.
    A name can be mapped to itself, a new name, or None (omit from new
    repository).'''

    def __init__(self, ui, path=None):
        self.ui = ui
        self.include = {}
        self.exclude = {}
        self.rename = {}
        if path:
            if self.parse(path):
                raise util.Abort(_('errors in filemap'))

    def parse(self, path):
        errs = 0
        def check(name, mapping, listname):
            if name in mapping:
                self.ui.warn(_('%s:%d: %r already in %s list\n') %
                             (lex.infile, lex.lineno, name, listname))
                return 1
            return 0
        lex = shlex.shlex(open(path), path, True)
        lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
        cmd = lex.get_token()
        while cmd:
            if cmd == 'include':
                name = lex.get_token()
                errs += check(name, self.exclude, 'exclude')
                self.include[name] = name
            elif cmd == 'exclude':
                name = lex.get_token()
                errs += check(name, self.include, 'include')
                errs += check(name, self.rename, 'rename')
                self.exclude[name] = name
            elif cmd == 'rename':
                src = lex.get_token()
                dest = lex.get_token()
                errs += check(src, self.exclude, 'exclude')
                self.rename[src] = dest
            elif cmd == 'source':
                errs += self.parse(lex.get_token())
            else:
                self.ui.warn(_('%s:%d: unknown directive %r\n') %
                             (lex.infile, lex.lineno, cmd))
                errs += 1
            cmd = lex.get_token()
        return errs

    def lookup(self, name, mapping):
        for pre, suf in rpairs(name):
            try:
                return mapping[pre], pre, suf
            except KeyError, err:
                pass
        return '', name, ''
        
    def __call__(self, name):
        if self.include:
            inc = self.lookup(name, self.include)[0]
        else:
            inc = name
        if self.exclude:
            exc = self.lookup(name, self.exclude)[0]
        else:
            exc = ''
        if not inc or exc:
            return None
        newpre, pre, suf = self.lookup(name, self.rename)
        print 'XXX', (newpre, pre, suf)
        if newpre:
            if newpre == '.':
                return suf
            if suf:
                return newpre + '/' + suf
            return newpre
        return name

def _convert(ui, src, dest=None, revmapfile=None, **opts):
    """Convert a foreign SCM repository to a Mercurial one.

    Accepted source formats:
    - GIT
    - CVS
    - SVN

    Accepted destination formats:
    - Mercurial

    If no revision is given, all revisions will be converted. Otherwise,
    convert will only import up to the named revision (given in a format
    understood by the source).

    If no destination directory name is specified, it defaults to the
    basename of the source with '-hg' appended.  If the destination
    repository doesn't exist, it will be created.

    If <revmapfile> isn't given, it will be put in a default location
    (<dest>/.hg/shamap by default).  The <revmapfile> is a simple text
    file that maps each source commit ID to the destination ID for
    that revision, like so:
    <source ID> <destination ID>

    If the file doesn't exist, it's automatically created.  It's updated
    on each commit copied, so convert-repo can be interrupted and can
    be run repeatedly to copy new commits.

    The [username mapping] file is a simple text file that maps each source
    commit author to a destination commit author. It is handy for source SCMs
    that use unix logins to identify authors (eg: CVS). One line per author
    mapping and the line format is:
    srcauthor=whatever string you want
    """

    util._encoding = 'UTF-8'

    if not dest:
        dest = hg.defaultdest(src) + "-hg"
        ui.status("assuming destination %s\n" % dest)

    # Try to be smart and initalize things when required
    created = False
    if os.path.isdir(dest):
        if len(os.listdir(dest)) > 0:
            try:
                hg.repository(ui, dest)
                ui.status("destination %s is a Mercurial repository\n" % dest)
            except hg.RepoError:
                raise util.Abort(
                    "destination directory %s is not empty.\n"
                    "Please specify an empty directory to be initialized\n"
                    "or an already initialized mercurial repository"
                    % dest)
        else:
            ui.status("initializing destination %s repository\n" % dest)
            hg.repository(ui, dest, create=True)
            created = True
    elif os.path.exists(dest):
        raise util.Abort("destination %s exists and is not a directory" % dest)
    else:
        ui.status("initializing destination %s repository\n" % dest)
        hg.repository(ui, dest, create=True)
        created = True

    destc = convertsink(ui, dest)

    try:
        srcc = convertsource(ui, src, rev=opts.get('rev'))
    except Exception:
        if created:
            shutil.rmtree(dest, True)
        raise

    if not revmapfile:
        try:
            revmapfile = destc.revmapfile()
        except:
            revmapfile = os.path.join(destc, "map")


    c = convert(ui, srcc, destc, revmapfile, filemapper(ui, opts['filemap']),
                opts)
    c.convert()

def debugsvnlog(ui, **opts):
    """Fetch SVN log in a subprocess and channel them back to parent to
    avoid memory collection issues.
    """
    util.set_binary(sys.stdin)
    util.set_binary(sys.stdout)
    args = decodeargs(sys.stdin.read())
    subversion.get_log_child(sys.stdout, *args)

cmdtable = {
    "convert":
        (_convert,
         [('A', 'authors', '', 'username mapping filename'),
          ('', 'filemap', '', 'remap file names using contents of file'),
          ('r', 'rev', '', 'import up to target revision REV'),
          ('', 'datesort', None, 'try to sort changesets by date')],
         'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
    "debug-svn-log":
        (debugsvnlog,
         [],
         'hg debug-svn-log'),
}