convert: add bzr source
authorMarek Kubica <marek@xivilization.net>
Fri, 26 Sep 2008 20:33:47 +0200
changeset 7053 209ef5f3534c
parent 7052 0ca4f42daed7
child 7054 e837f2294643
convert: add bzr source
hgext/convert/__init__.py
hgext/convert/bzr.py
hgext/convert/convcmd.py
tests/bzr-definitions
tests/hghave
tests/test-convert-bzr
tests/test-convert-bzr-directories
tests/test-convert-bzr-directories.out
tests/test-convert-bzr-ghosts
tests/test-convert-bzr-ghosts.out
tests/test-convert-bzr-merges
tests/test-convert-bzr-merges.out
tests/test-convert-bzr-treeroot
tests/test-convert-bzr-treeroot.out
tests/test-convert-bzr.out
tests/test-convert.out
--- a/hgext/convert/__init__.py	Sat Sep 27 10:11:31 2008 +0200
+++ b/hgext/convert/__init__.py	Fri Sep 26 20:33:47 2008 +0200
@@ -23,6 +23,7 @@
     - Subversion [svn]
     - Monotone [mtn]
     - GNU Arch [gnuarch]
+    - Bazaar [bzr]
 
     Accepted destination formats [identifiers]:
     - Mercurial [hg]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/convert/bzr.py	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,216 @@
+# bzr support for the convert extension
+# This module is for handling 'bzr', that was formerly known as Bazaar-NG;
+# it cannot access 'bar' repositories, but they were never used very much
+
+import os
+from mercurial import demandimport
+# these do not work with demandimport, blacklist
+demandimport.ignore.extend([
+        'bzrlib.transactions',
+        'bzrlib.urlutils',
+    ])
+
+from mercurial.i18n import _
+from mercurial import util
+from common import NoRepo, commit, converter_source
+
+try:
+    # bazaar imports
+    from bzrlib import branch, revision, errors
+    from bzrlib.revisionspec import RevisionSpec
+except ImportError:
+    pass
+
+class bzr_source(converter_source):
+    """Reads Bazaar repositories by using the Bazaar Python libraries"""
+
+    def __init__(self, ui, path, rev=None):
+        super(bzr_source, self).__init__(ui, path, rev=rev)
+
+        try:
+            # access bzrlib stuff
+            branch
+        except NameError:
+            raise NoRepo('Bazaar modules could not be loaded')
+
+        if not os.path.exists(os.path.join(path, '.bzr')):
+            raise NoRepo('%s does not look like a Bazaar repo' % path)
+
+        path = os.path.abspath(path)
+        self.branch = branch.Branch.open(path)
+        self.sourcerepo = self.branch.repository
+        self._parentids = {}
+
+    def before(self):
+        """Before the conversion begins, acquire a read lock
+        for all the operations that might need it. Fortunately
+        read locks don't block other reads or writes to the
+        repository, so this shouldn't have any impact on the usage of
+        the source repository.
+
+        The alternative would be locking on every operation that
+        needs locks (there are currently two: getting the file and
+        getting the parent map) and releasing immediately after,
+        but this approach can take even 40% longer."""
+        self.sourcerepo.lock_read()
+
+    def after(self):
+        self.sourcerepo.unlock()
+
+    def getheads(self):
+        if not self.rev:
+            return [self.branch.last_revision()]
+        try:
+            r = RevisionSpec.from_string(self.rev)
+            info = r.in_history(self.branch)
+        except errors.BzrError:
+            raise util.Abort(_('%s is not a valid revision in current branch')
+                             % self.rev)
+        return [info.rev_id]
+
+    def getfile(self, name, rev):
+        revtree = self.sourcerepo.revision_tree(rev)
+        fileid = revtree.path2id(name)
+        if fileid is None:
+            # the file is not available anymore - was deleted
+            raise IOError(_('%s is not available in %s anymore') %
+                    (name, rev))
+        sio = revtree.get_file(fileid)
+        return sio.read()
+
+    def getmode(self, name, rev):
+        return self._modecache[(name, rev)]
+
+    def getchanges(self, version):
+        # set up caches: modecache and revtree
+        self._modecache = {}
+        self._revtree = self.sourcerepo.revision_tree(version)
+        # get the parentids from the cache
+        parentids = self._parentids.pop(version)
+        # only diff against first parent id
+        prevtree = self.sourcerepo.revision_tree(parentids[0])
+        return self._gettreechanges(self._revtree, prevtree)
+
+    def getcommit(self, version):
+        rev = self.sourcerepo.get_revision(version)
+        # populate parent id cache
+        if not rev.parent_ids:
+            parents = []
+            self._parentids[version] = (revision.NULL_REVISION,)
+        else:
+            parents = self._filterghosts(rev.parent_ids)
+            self._parentids[version] = parents
+
+        return commit(parents=parents,
+                # bzr uses 1 second timezone precision
+                date='%d %d' % (rev.timestamp, rev.timezone / 3600),
+                author=self.recode(rev.committer),
+                # bzr returns bytestrings or unicode, depending on the content
+                desc=self.recode(rev.message),
+                rev=version)
+
+    def gettags(self):
+        if not self.branch.supports_tags():
+            return {}
+        tagdict = self.branch.tags.get_tag_dict()
+        bytetags = {}
+        for name, rev in tagdict.iteritems():
+            bytetags[self.recode(name)] = rev
+        return bytetags
+
+    def getchangedfiles(self, rev, i):
+        self._modecache = {}
+        curtree = self.sourcerepo.revision_tree(rev)
+        parentids = self._parentids.pop(rev)
+        if i is not None:
+            parentid = parentids[i]
+        else:
+            # no parent id, get the empty revision
+            parentid = revision.NULL_REVISION
+
+        prevtree = self.sourcerepo.revision_tree(parentid)
+        changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]
+        return changes
+
+    def _gettreechanges(self, current, origin):
+        revid = current._revision_id;
+        changes = []
+        renames = {}
+        for (fileid, paths, changed_content, versioned, parent, name,
+            kind, executable) in current.iter_changes(origin):
+
+            if paths[0] == u'' or paths[1] == u'':
+                # ignore changes to tree root
+                continue
+
+            # bazaar tracks directories, mercurial does not, so
+            # we have to rename the directory contents
+            if kind[1] == 'directory':
+                if None not in paths and paths[0] != paths[1]:
+                    # neither an add nor an delete - a move
+                    # rename all directory contents manually
+                    subdir = origin.inventory.path2id(paths[0])
+                    # get all child-entries of the directory
+                    for name, entry in origin.inventory.iter_entries(subdir):
+                        # hg does not track directory renames
+                        if entry.kind == 'directory':
+                            continue
+                        frompath = self.recode(paths[0] + '/' + name)
+                        topath = self.recode(paths[1] + '/' + name)
+                        # register the files as changed
+                        changes.append((frompath, revid))
+                        changes.append((topath, revid))
+                        # add to mode cache
+                        mode = ((entry.executable and 'x') or (entry.kind == 'symlink' and 's')
+                                or '')
+                        self._modecache[(topath, revid)] = mode
+                        # register the change as move
+                        renames[topath] = frompath
+
+                # no futher changes, go to the next change
+                continue
+
+            # we got unicode paths, need to convert them
+            path, topath = [self.recode(part) for part in paths]
+
+            if topath is None:
+                # file deleted
+                changes.append((path, revid))
+                continue
+
+            # renamed
+            if path and path != topath:
+                renames[topath] = path
+
+            # populate the mode cache
+            kind, executable = [e[1] for e in (kind, executable)]
+            mode = ((executable and 'x') or (kind == 'symlink' and 's')
+                    or '')
+            self._modecache[(topath, revid)] = mode
+            changes.append((topath, revid))
+
+        return changes, renames
+
+    def _filterghosts(self, ids):
+        """Filters out ghost revisions which hg does not support, see
+        <http://bazaar-vcs.org/GhostRevision>
+        """
+        parentmap = self.sourcerepo.get_parent_map(ids)
+        parents = tuple(parent for parent in ids if parent in parentmap)
+        return parents
+
+    def recode(self, s, encoding=None):
+        """This version of recode tries to encode unicode to bytecode,
+        and preferably using the UTF-8 codec.
+        Other types than Unicode are silently returned, this is by
+        intention, e.g. the None-type is not going to be encoded but instead
+        just passed through
+        """
+        if not encoding:
+            encoding = self.encoding or 'utf-8'
+
+        if isinstance(s, unicode):
+            return s.encode(encoding)
+        else:
+            # leave it alone
+            return s
--- a/hgext/convert/convcmd.py	Sat Sep 27 10:11:31 2008 +0200
+++ b/hgext/convert/convcmd.py	Fri Sep 26 20:33:47 2008 +0200
@@ -13,6 +13,7 @@
 from subversion import debugsvnlog, svn_source, svn_sink
 from monotone import monotone_source
 from gnuarch import gnuarch_source
+from bzr import bzr_source
 import filemap
 
 import os, shutil
@@ -35,6 +36,7 @@
     ('darcs', darcs_source),
     ('mtn', monotone_source),
     ('gnuarch', gnuarch_source),
+    ('bzr', bzr_source),
     ]
 
 sink_converters = [
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/bzr-definitions	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,18 @@
+# this file holds the definitions that are used in various bzr tests
+
+"$TESTDIR/hghave" bzr || exit 80
+
+echo '[extensions]' >> $HGRCPATH
+echo 'convert = ' >> $HGRCPATH
+echo 'hgext.graphlog = ' >> $HGRCPATH
+
+glog()
+{
+    hg glog --template '#rev# "#desc|firstline#" files: #files#\n' "$@"
+}
+
+manifest()
+{
+    echo "% manifest of $2"
+    hg -R $1 manifest -v -r $2
+}
--- a/tests/hghave	Sat Sep 27 10:11:31 2008 +0200
+++ b/tests/hghave	Fri Sep 26 20:33:47 2008 +0200
@@ -24,6 +24,9 @@
 def has_baz():
     return matchoutput('baz --version 2>&1', r'baz Bazaar version')
 
+def has_bzr():
+    return matchoutput('bzr --version 2>&1', r'Bazaar \(bzr\)')
+
 def has_cvs():
     re = r'Concurrent Versions System.*?server'
     return matchoutput('cvs --version 2>&1', re)
@@ -146,6 +149,7 @@
 
 checks = {
     "baz": (has_baz, "GNU Arch baz client"),
+    "bzr": (has_bzr, "Canonical's Bazaar client"),
     "cvs": (has_cvs, "cvs client/server"),
     "cvsps": (has_cvsps, "cvsps utility"),
     "darcs": (has_darcs, "darcs client"),
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,81 @@
+#!/bin/sh
+
+source "$TESTDIR/bzr-definitions"
+
+echo % create and rename on the same file in the same step
+mkdir test-createandrename
+cd test-createandrename
+bzr init -q source
+cd source
+echo a > a
+bzr add -q a
+bzr commit -q -m 'Initial add: a'
+bzr mv a b
+echo a2 >> a
+bzr add -q a
+bzr commit -q -m 'rename a into b, create a'
+cd ..
+hg convert source source-hg
+glog -R source-hg
+echo "% test --rev option"
+hg convert -r 1 source source-1-hg
+glog -R source-1-hg
+cd ..
+
+echo % merge
+mkdir test-merge
+cd test-merge
+
+cat > helper.py <<EOF
+import sys
+from bzrlib import workingtree
+wt = workingtree.WorkingTree.open('.')
+
+message, stamp = sys.argv[1:]
+wt.commit(message, timestamp=int(stamp))
+EOF
+
+bzr init -q source
+cd source
+echo content > a
+echo content2 > b
+bzr add -q a b
+bzr commit -q -m 'Initial add'
+cd ..
+bzr branch -q source source-improve
+cd source
+echo more >> a
+python ../helper.py 'Editing a' 100
+cd ../source-improve
+echo content3 >> b
+python ../helper.py 'Editing b' 200
+cd ../source
+bzr merge -q ../source-improve
+bzr commit -q -m 'Merged improve branch'
+cd ..
+hg convert --datesort source source-hg
+glog -R source-hg
+cd ..
+
+echo % symlinks and executable files
+mkdir test-symlinks
+cd test-symlinks
+bzr init -q source
+cd source
+touch program
+chmod +x program
+ln -s program altname
+bzr add -q altname program
+bzr commit -q -m 'Initial setup'
+touch newprog
+chmod +x newprog
+rm altname
+ln -s newprog altname
+chmod -x program
+bzr add -q newprog
+bzr commit -q -m 'Symlink changed, x bits changed'
+cd ..
+hg convert source source-hg
+manifest source-hg 0
+manifest source-hg tip
+cd ..
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr-directories	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,93 @@
+#!/bin/sh
+
+source "$TESTDIR/bzr-definitions"
+
+echo % empty directory
+mkdir test-empty
+cd test-empty
+bzr init -q source
+cd source
+echo content > a
+bzr add -q a
+bzr commit -q -m 'Initial add'
+mkdir empty
+bzr add -q empty
+bzr commit -q -m 'Empty directory added'
+echo content > empty/something
+bzr add -q empty/something
+bzr commit -q -m 'Added file into directory'
+cd ..
+hg convert source source-hg
+manifest source-hg 1
+manifest source-hg tip
+cd ..
+
+echo % directory renames
+mkdir test-dir-rename
+cd test-dir-rename
+bzr init -q source
+cd source
+mkdir tpyo
+echo content > tpyo/something
+bzr add -q tpyo
+bzr commit -q -m 'Added directory'
+bzr mv tpyo typo
+bzr commit -q -m 'Oops, typo'
+cd ..
+hg convert source source-hg
+manifest source-hg 0
+manifest source-hg tip
+cd ..
+
+echo % nested directory renames
+mkdir test-nested-dir-rename
+cd test-nested-dir-rename
+bzr init -q source
+cd source
+mkdir -p firstlevel/secondlevel/thirdlevel
+echo content > firstlevel/secondlevel/file
+echo this_needs_to_be_there_too > firstlevel/secondlevel/thirdlevel/stuff
+bzr add -q firstlevel
+bzr commit -q -m 'Added nested directories'
+bzr mv firstlevel/secondlevel secondlevel
+bzr commit -q -m 'Moved secondlevel one level up'
+cd ..
+hg convert source source-hg
+manifest source-hg tip
+cd ..
+
+echo % directory remove
+mkdir test-dir-remove
+cd test-dir-remove
+bzr init -q source
+cd source
+mkdir src
+echo content > src/sourcecode
+bzr add -q src
+bzr commit -q -m 'Added directory'
+bzr rm -q src
+bzr commit -q -m 'Removed directory'
+cd ..
+hg convert source source-hg
+manifest source-hg 0
+manifest source-hg tip
+cd ..
+
+echo % directory replace
+mkdir test-dir-replace
+cd test-dir-replace
+bzr init -q source
+cd source
+mkdir first second
+echo content > first/file
+echo morecontent > first/dummy
+echo othercontent > second/something
+bzr add -q first second
+bzr commit -q -m 'Initial layout'
+bzr mv first/file second/file
+bzr mv first third
+bzr commit -q -m 'Some conflicting moves'
+cd ..
+hg convert source source-hg
+manifest source-hg tip
+cd ..
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr-directories.out	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,59 @@
+% empty directory
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+2 Initial add
+1 Empty directory added
+0 Added file into directory
+% manifest of 1
+644   a
+% manifest of tip
+644   a
+644   empty/something
+% directory renames
+tpyo => typo
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+1 Added directory
+0 Oops, typo
+% manifest of 0
+644   tpyo/something
+% manifest of tip
+644   typo/something
+% nested directory renames
+firstlevel/secondlevel => secondlevel
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+1 Added nested directories
+0 Moved secondlevel one level up
+% manifest of tip
+644   secondlevel/file
+644   secondlevel/thirdlevel/stuff
+% directory remove
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+1 Added directory
+0 Removed directory
+% manifest of 0
+644   src/sourcecode
+% manifest of tip
+% directory replace
+first/file => second/file
+first => third
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+1 Initial layout
+0 Some conflicting moves
+% manifest of tip
+644   second/file
+644   second/something
+644   third/dummy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr-ghosts	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+source "$TESTDIR/bzr-definitions"
+
+cat > ghostcreator.py <<EOF
+import sys
+from bzrlib import workingtree
+wt = workingtree.WorkingTree.open('.')
+
+message, ghostrev = sys.argv[1:]
+wt.set_parent_ids(wt.get_parent_ids() + [ghostrev])
+wt.commit(message)
+EOF
+
+echo % ghost revisions
+mkdir test-ghost-revisions
+cd test-ghost-revisions
+bzr init -q source
+cd source
+echo content > somefile
+bzr add -q somefile
+bzr commit -q -m 'Initial layout setup'
+echo morecontent >> somefile
+python ../../ghostcreator.py 'Commit with ghost revision' ghostrev
+cd ..
+hg convert source source-hg
+glog -R source-hg
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr-ghosts.out	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,11 @@
+% ghost revisions
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+1 Initial layout setup
+0 Commit with ghost revision
+o  1 "Commit with ghost revision" files: somefile
+|
+o  0 "Initial layout setup" files: somefile
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr-merges	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+source "$TESTDIR/bzr-definitions"
+
+echo % test multiple merges at once
+mkdir test-multimerge
+cd test-multimerge
+bzr init -q source
+cd source
+echo content > file
+bzr add -q file
+bzr commit -q -m 'Initial add'
+cd ..
+bzr branch -q source source-branch1
+cd source-branch1
+echo morecontent >> file
+echo evenmorecontent > file-branch1
+bzr add -q file-branch1
+bzr commit -q -m 'Added branch1 file'
+cd ../source
+echo content > file-parent
+bzr add -q file-parent
+bzr commit -q -m 'Added parent file'
+cd ..
+bzr branch -q source source-branch2
+cd source-branch2
+echo somecontent > file-branch2
+bzr add -q file-branch2
+bzr commit -q -m 'Added brach2 file'
+cd ../source
+bzr merge -q ../source-branch1
+bzr merge -q --force ../source-branch2
+bzr commit -q -m 'Merged branches'
+cd ..
+hg convert --datesort source source-hg
+glog -R source-hg
+manifest source-hg tip
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr-merges.out	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,27 @@
+% test multiple merges at once
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+4 Initial add
+3 Added branch1 file
+2 Added parent file
+1 Added brach2 file
+0 Merged branches
+o    5 "(octopus merge fixup)" files:
+|\
+| o    4 "Merged branches" files: file-branch2
+| |\
+o---+  3 "Added brach2 file" files: file-branch2
+ / /
+| o  2 "Added parent file" files: file-parent
+| |
+o |  1 "Added branch1 file" files: file file-branch1
+|/
+o  0 "Initial add" files: file
+
+% manifest of tip
+644   file
+644   file-branch1
+644   file-branch2
+644   file-parent
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr-treeroot	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+source "$TESTDIR/bzr-definitions"
+
+cat > treeset.py <<EOF
+import sys
+from bzrlib import workingtree
+wt = workingtree.WorkingTree.open('.')
+
+message, rootid = sys.argv[1:]
+wt.set_root_id('tree_root-%s' % rootid)
+wt.commit(message)
+EOF
+
+echo % change the id of the tree root
+mkdir test-change-treeroot-id
+cd test-change-treeroot-id
+bzr init -q source
+cd source
+echo content > file
+bzr add -q file
+bzr commit -q -m 'Initial add'
+python ../../treeset.py 'Changed root' new
+cd ..
+hg convert source source-hg
+manifest source-hg tip
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr-treeroot.out	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,9 @@
+% change the id of the tree root
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+1 Initial add
+0 Changed root
+% manifest of tip
+644   file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-bzr.out	Fri Sep 26 20:33:47 2008 +0200
@@ -0,0 +1,51 @@
+% create and rename on the same file in the same step
+a => b
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+1 Initial add: a
+0 rename a into b, create a
+o  1 "rename a into b, create a" files: a b
+|
+o  0 "Initial add: a" files: a
+
+% test --rev option
+initializing destination source-1-hg repository
+scanning source...
+sorting...
+converting...
+0 Initial add: a
+o  0 "Initial add: a" files: a
+
+% merge
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+3 Initial add
+2 Editing a
+1 Editing b
+0 Merged improve branch
+o    3 "Merged improve branch" files:
+|\
+| o  2 "Editing b" files: b
+| |
+o |  1 "Editing a" files: a
+|/
+o  0 "Initial add" files: a b
+
+% symlinks and executable files
+initializing destination source-hg repository
+scanning source...
+sorting...
+converting...
+1 Initial setup
+0 Symlink changed, x bits changed
+% manifest of 0
+644   altname
+755 * program
+% manifest of tip
+644   altname
+755 * newprog
+644   program
--- a/tests/test-convert.out	Sat Sep 27 10:11:31 2008 +0200
+++ b/tests/test-convert.out	Fri Sep 26 20:33:47 2008 +0200
@@ -10,6 +10,7 @@
     - Subversion [svn]
     - Monotone [mtn]
     - GNU Arch [gnuarch]
+    - Bazaar [bzr]
 
     Accepted destination formats [identifiers]:
     - Mercurial [hg]