changeset 5877:5692bed8230b

Merge with crew
author Matt Mackall <mpm@selenic.com>
date Fri, 18 Jan 2008 10:07:36 -0600
parents 2c565b9598b8 (current diff) fb93c774dfff (diff)
children d39af2eabb8c
files
diffstat 3 files changed, 309 insertions(+), 70 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/convert/subversion.py	Fri Jan 18 09:03:17 2008 -0600
+++ b/hgext/convert/subversion.py	Fri Jan 18 10:07:36 2008 -0600
@@ -89,6 +89,9 @@
                        receiver)
     except SubversionException, (inst, num):
         pickle.dump(num, fp, protocol)
+    except IOError:
+        # Caller may interrupt the iteration
+        pickle.dump(None, fp, protocol)
     else:
         pickle.dump(None, fp, protocol)
     fp.close()
@@ -102,7 +105,53 @@
     args = decodeargs(sys.stdin.read())
     get_log_child(sys.stdout, *args)
 
+class logstream:
+    """Interruptible revision log iterator."""
+    def __init__(self, stdout):
+        self._stdout = stdout
+
+    def __iter__(self):
+        while True:
+            entry = pickle.load(self._stdout)
+            try:
+                orig_paths, revnum, author, date, message = entry
+            except:
+                if entry is None:
+                    break
+                raise SubversionException("child raised exception", entry)
+            yield entry
+
+    def close(self):
+        if self._stdout:
+            self._stdout.close()
+            self._stdout = None
+
+def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
+                strict_node_history=False):
+    args = [url, paths, start, end, limit, discover_changed_paths,
+            strict_node_history]
+    arg = encodeargs(args)
+    hgexe = util.hgexecutable()
+    cmd = '%s debugsvnlog' % util.shellquote(hgexe)
+    stdin, stdout = os.popen2(cmd, 'b')
+    stdin.write(arg)
+    stdin.close()
+    return logstream(stdout)
+
 # SVN conversion code stolen from bzr-svn and tailor
+#
+# Subversion looks like a versioned filesystem, branches structures
+# are defined by conventions and not enforced by the tool. First,
+# we define the potential branches (modules) as "trunk" and "branches"
+# children directories. Revisions are then identified by their
+# module and revision number (and a repository identifier).
+#
+# The revision graph is really a tree (or a forest). By default, a
+# revision parent is the previous revision in the same module. If the
+# module directory is copied/moved from another module then the
+# revision is the module root and its parent the source revision in
+# the parent module. A revision has at most one parent.
+#
 class svn_source(converter_source):
     def __init__(self, ui, url, rev=None):
         super(svn_source, self).__init__(ui, url, rev=rev)
@@ -133,7 +182,6 @@
             self.ctx = self.transport.client
             self.base = svn.ra.get_repos_root(self.ra)
             self.module = self.url[len(self.base):]
-            self.modulemap = {} # revision, module
             self.commits = {}
             self.paths = {}
             self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
@@ -257,45 +305,26 @@
             uuid, module, revnum = self.revsplit(rev)
             self.module = module
             self.reparent(module)
+            # We assume that:
+            # - requests for revisions after "stop" come from the
+            # revision graph backward traversal. Cache all of them
+            # down to stop, they will be used eventually.
+            # - requests for revisions before "stop" come to get
+            # isolated branches parents. Just fetch what is needed.
             stop = self.lastrevs.get(module, 0)
-            self._fetch_revisions(from_revnum=revnum, to_revnum=stop)
+            if revnum < stop:
+                stop = revnum + 1
+            self._fetch_revisions(revnum, stop)
         commit = self.commits[rev]
         # caller caches the result, so free it here to release memory
         del self.commits[rev]
         return commit
 
-    def get_log(self, paths, start, end, limit=0, discover_changed_paths=True,
-                strict_node_history=False):
-
-        def parent(fp):
-            while True:
-                entry = pickle.load(fp)
-                try:
-                    orig_paths, revnum, author, date, message = entry
-                except:
-                    if entry is None:
-                        break
-                    raise SubversionException("child raised exception", entry)
-                yield entry
-
-        args = [self.url, paths, start, end, limit, discover_changed_paths,
-                strict_node_history]
-        arg = encodeargs(args)
-        hgexe = util.hgexecutable()
-        cmd = '%s debugsvnlog' % util.shellquote(hgexe)
-        stdin, stdout = os.popen2(cmd, 'b')
-
-        stdin.write(arg)
-        stdin.close()
-
-        for p in parent(stdout):
-            yield p
-
     def gettags(self):
         tags = {}
         start = self.revnum(self.head)
         try:
-            for entry in self.get_log([self.tags], 0, start):
+            for entry in get_log(self.url, [self.tags], 0, start):
                 orig_paths, revnum, author, date, message = entry
                 for path in orig_paths:
                     if not path.startswith(self.tags+'/'):
@@ -400,13 +429,11 @@
         entries = []
         copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
         copies = {}
-        revnum = self.revnum(rev)
 
-        if revnum in self.modulemap:
-            new_module = self.modulemap[revnum]
-            if new_module != self.module:
-                self.module = new_module
-                self.reparent(self.module)
+        new_module, revnum = self.revsplit(rev)[1:]
+        if new_module != self.module:
+            self.module = new_module
+            self.reparent(self.module)
 
         for path, ent in paths:
             entrypath = get_entry_from_path(path, module=self.module)
@@ -432,12 +459,9 @@
 
                 # if a branch is created but entries are removed in the same
                 # changeset, get the right fromrev
-                if parents:
-                    uuid, old_module, fromrev = self.revsplit(parents[0])
-                else:
-                    fromrev = revnum - 1
-                    # might always need to be revnum - 1 in these 3 lines?
-                    old_module = self.modulemap.get(fromrev, self.module)
+                # parents cannot be empty here, you cannot remove things from
+                # a root revision.
+                uuid, old_module, fromrev = self.revsplit(parents[0])
 
                 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
                 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
@@ -508,6 +532,9 @@
 
                 # If the directory just had a prop change,
                 # then we shouldn't need to look for its children.
+                if ent.action == 'M':
+                    continue
+
                 # Also this could create duplicate entries. Not sure
                 # whether this will matter. Maybe should make entries a set.
                 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
@@ -568,23 +595,23 @@
 
         return (entries, copies)
 
-    def _fetch_revisions(self, from_revnum = 0, to_revnum = 347):
+    def _fetch_revisions(self, from_revnum, to_revnum):
+        if from_revnum < to_revnum:
+            from_revnum, to_revnum = to_revnum, from_revnum
+
         self.child_cset = None
         def parselogentry(orig_paths, revnum, author, date, message):
+            """Return the parsed commit object or None, and True if 
+            the revision is a branch root.
+            """
             self.ui.debug("parsing revision %d (%d changes)\n" %
                           (revnum, len(orig_paths)))
 
-            if revnum in self.modulemap:
-                new_module = self.modulemap[revnum]
-                if new_module != self.module:
-                    self.module = new_module
-                    self.reparent(self.module)
-
             rev = self.revid(revnum)
             # branch log might return entries for a parent we already have
-            if (rev in self.commits or
-                (revnum < self.lastrevs.get(self.module, 0))):
-                return
+
+            if (rev in self.commits or revnum < to_revnum):
+                return None, False
 
             parents = []
             # check whether this revision is the start of a branch
@@ -593,15 +620,12 @@
                 if ent.copyfrom_path:
                     # ent.copyfrom_rev may not be the actual last revision
                     prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
-                    self.modulemap[prev] = ent.copyfrom_path
                     parents = [self.revid(prev, ent.copyfrom_path)]
                     self.ui.note('found parent of branch %s at %d: %s\n' % \
                                      (self.module, prev, ent.copyfrom_path))
                 else:
                     self.ui.debug("No copyfrom path, don't know what to do.\n")
 
-            self.modulemap[revnum] = self.module # track backwards in time
-
             orig_paths = orig_paths.items()
             orig_paths.sort()
             paths = []
@@ -612,8 +636,6 @@
                     continue
                 paths.append((path, ent))
 
-            self.paths[rev] = (paths, parents)
-
             # Example SVN datetime. Includes microseconds.
             # ISO-8601 conformant
             # '2007-01-04T17:35:00.902377Z'
@@ -636,23 +658,50 @@
                           rev=rev.encode('utf-8'))
 
             self.commits[rev] = cset
+            # The parents list is *shared* among self.paths and the
+            # commit object. Both will be updated below.
+            self.paths[rev] = (paths, cset.parents)
             if self.child_cset and not self.child_cset.parents:
-                self.child_cset.parents = [rev]
+                self.child_cset.parents[:] = [rev]
             self.child_cset = cset
+            return cset, len(parents) > 0
 
         self.ui.note('fetching revision log for "%s" from %d to %d\n' %
                      (self.module, from_revnum, to_revnum))
 
         try:
-            for entry in self.get_log([self.module], from_revnum, to_revnum):
-                orig_paths, revnum, author, date, message = entry
-                if self.is_blacklisted(revnum):
-                    self.ui.note('skipping blacklisted revision %d\n' % revnum)
-                    continue
-                if orig_paths is None:
-                    self.ui.debug('revision %d has no entries\n' % revnum)
-                    continue
-                parselogentry(orig_paths, revnum, author, date, message)
+            firstcset = None
+            stream = get_log(self.url, [self.module], from_revnum, to_revnum)
+            try:
+                for entry in stream:
+                    paths, revnum, author, date, message = entry
+                    if self.is_blacklisted(revnum):
+                        self.ui.note('skipping blacklisted revision %d\n' 
+                                     % revnum)
+                        continue
+                    if paths is None:
+                        self.ui.debug('revision %d has no entries\n' % revnum)
+                        continue
+                    cset, branched = parselogentry(paths, revnum, author, 
+                                                   date, message)
+                    if cset:
+                        firstcset = cset
+                    if branched:
+                        break
+            finally:
+                stream.close()
+
+            if firstcset and not firstcset.parents:
+                # The first revision of the sequence (the last fetched one)
+                # has invalid parents if not a branch root. Find the parent
+                # revision now, if any.
+                try:
+                    firstrevnum = self.revnum(firstcset.rev)
+                    if firstrevnum > 1:
+                        latest = self.latest(self.module, firstrevnum - 1)
+                        firstcset.parents.append(self.revid(latest))
+                except util.Abort:
+                    pass
         except SubversionException, (inst, num):
             if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
                 raise NoSuchRevision(branch=self,
@@ -664,9 +713,9 @@
         # TODO: ra.get_file transmits the whole file instead of diffs.
         mode = ''
         try:
-            revnum = self.revnum(rev)
-            if self.module != self.modulemap[revnum]:
-                self.module = self.modulemap[revnum]
+            new_module, revnum = self.revsplit(rev)[1:]
+            if self.module != new_module:
+                self.module = new_module
                 self.reparent(self.module)
             info = svn.ra.get_file(self.ra, file, revnum, io)
             if isinstance(info, list):
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-svn-branches	Fri Jan 18 10:07:36 2008 -0600
@@ -0,0 +1,89 @@
+#!/bin/sh
+
+"$TESTDIR/hghave" svn svn-bindings || exit 80
+
+fix_path()
+{
+    tr '\\' /
+}
+
+echo "[extensions]" >> $HGRCPATH
+echo "convert = " >> $HGRCPATH
+echo "hgext.graphlog =" >> $HGRCPATH
+
+svnadmin create svn-repo
+
+svnpath=`pwd | fix_path`
+# SVN wants all paths to start with a slash. Unfortunately,
+# Windows ones don't. Handle that.
+expr $svnpath : "\/" > /dev/null
+if [ $? -ne 0 ]; then
+    svnpath='/'$svnpath
+fi
+
+echo % initial svn import
+mkdir projA
+cd projA
+mkdir trunk
+mkdir branches
+mkdir tags
+cd ..
+
+svnurl=file://$svnpath/svn-repo/projA
+svn import -m "init projA" projA $svnurl | fix_path
+
+echo % update svn repository
+svn co $svnurl A | fix_path
+cd A
+echo hello > trunk/letter.txt
+echo hey > trunk/letter2.txt
+echo ho > trunk/letter3.txt
+svn add trunk/letter.txt trunk/letter2.txt trunk/letter3.txt
+svn ci -m hello
+
+echo % branch to old letters
+svn copy trunk branches/old
+svn rm branches/old/letter3.txt
+svn ci -m "branch trunk, remove letter3"
+svn up
+
+echo % update trunk
+echo "what can I say ?" >> trunk/letter.txt
+svn ci -m "change letter"
+
+echo % update old branch
+echo "what's up ?" >> branches/old/letter2.txt
+svn ci -m "change letter2"
+
+echo % create a cross-branch revision
+svn move -m "move letter2" trunk/letter2.txt \
+    branches/old/letter3.txt
+echo "I am fine" >> branches/old/letter3.txt
+svn ci -m "move and update letter3.txt"
+
+echo % update old branch again
+echo "bye" >> branches/old/letter2.txt
+svn ci -m "change letter2 again"
+
+echo % update trunk again
+echo "how are you ?" >> trunk/letter.txt
+svn ci -m "last change to letter"
+cd ..
+
+echo % convert trunk and branches
+hg convert --datesort $svnurl A-hg
+
+echo % branch again from a converted revision
+cd A
+svn copy -r 1 $svnurl/trunk branches/old2
+svn ci -m "branch trunk@1 into old2"
+cd ..
+
+echo % convert again
+hg convert --datesort $svnurl A-hg
+
+cd A-hg
+hg glog --template '#rev# #desc|firstline# files: #files#\n'
+hg branches | sed 's/:.*/:/'
+hg tags -q
+cd ..
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-svn-branches.out	Fri Jan 18 10:07:36 2008 -0600
@@ -0,0 +1,101 @@
+% initial svn import
+Adding         projA/trunk
+Adding         projA/branches
+Adding         projA/tags
+
+Committed revision 1.
+% update svn repository
+A    A/trunk
+A    A/branches
+A    A/tags
+Checked out revision 1.
+A         trunk/letter.txt
+A         trunk/letter2.txt
+A         trunk/letter3.txt
+Adding         trunk/letter.txt
+Adding         trunk/letter2.txt
+Adding         trunk/letter3.txt
+Transmitting file data ...
+Committed revision 2.
+% branch to old letters
+A         branches/old
+D         branches/old/letter3.txt
+Adding         branches/old
+Adding         branches/old/letter.txt
+Adding         branches/old/letter2.txt
+Deleting       branches/old/letter3.txt
+
+Committed revision 3.
+At revision 3.
+% update trunk
+Sending        trunk/letter.txt
+Transmitting file data .
+Committed revision 4.
+% update old branch
+Sending        branches/old/letter2.txt
+Transmitting file data .
+Committed revision 5.
+% create a cross-branch revision
+A         branches/old/letter3.txt
+D         trunk/letter2.txt
+Adding         branches/old/letter3.txt
+Deleting       trunk/letter2.txt
+Transmitting file data .
+Committed revision 6.
+% update old branch again
+Sending        branches/old/letter2.txt
+Transmitting file data .
+Committed revision 7.
+% update trunk again
+Sending        trunk/letter.txt
+Transmitting file data .
+Committed revision 8.
+% convert trunk and branches
+initializing destination A-hg repository
+scanning source...
+sorting...
+converting...
+8 init projA
+7 hello
+6 branch trunk, remove letter3
+5 change letter
+4 change letter2
+3 move and update letter3.txt
+2 move and update letter3.txt
+1 change letter2 again
+0 last change to letter
+% branch again from a converted revision
+Checked out revision 1.
+A         branches/old2
+Adding         branches/old2
+
+Committed revision 9.
+% convert again
+scanning source...
+sorting...
+converting...
+0 branch trunk@1 into old2
+o  9 branch trunk@1 into old2 files:
+|
+| o  8 last change to letter files: letter.txt
+| |
+| | o  7 change letter2 again files: letter2.txt
+| | |
+| o |  6 move and update letter3.txt files: letter2.txt
+| | |
+| | o  5 move and update letter3.txt files: letter3.txt
+| | |
+| | o  4 change letter2 files: letter2.txt
+| | |
+| o |  3 change letter files: letter.txt
+| | |
++---o  2 branch trunk, remove letter3 files: letter.txt letter.txt letter2.txt letter2.txt
+| |
+| o  1 hello files: letter.txt letter2.txt letter3.txt
+|/
+o  0 init projA files:
+
+old2                           9:
+default                        8:
+old                            7:
+tip