convert: fix cross-branches subversion revisions handling
authorPatrick Mezard <pmezard@gmail.com>
Thu, 17 Jan 2008 23:46:56 +0100
changeset 5872 7d4149cccc5d
parent 5871 c1244685425a
child 5873 ecb4eb0cbff2
convert: fix cross-branches subversion revisions handling The "one revision belongs to one branch" assumptions is relaxed. Branch revisions are parsed down to the first branch copy encountered, older history is skipped. It means the conversion is still not satisfying when dealing with branches overwriting themselves. This issue already existed in the previous version.
hgext/convert/subversion.py
--- a/hgext/convert/subversion.py	Tue Jan 01 23:57:20 2008 +0100
+++ b/hgext/convert/subversion.py	Thu Jan 17 23:46:56 2008 +0100
@@ -133,7 +133,6 @@
             self.ctx = self.transport.client
             self.base = svn.ra.get_repos_root(self.ra)
             self.module = self.url[len(self.base):]
-            self.modulemap = {} # revision, module
             self.commits = {}
             self.paths = {}
             self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
@@ -400,13 +399,11 @@
         entries = []
         copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
         copies = {}
-        revnum = self.revnum(rev)
 
-        if revnum in self.modulemap:
-            new_module = self.modulemap[revnum]
-            if new_module != self.module:
-                self.module = new_module
-                self.reparent(self.module)
+        new_module, revnum = self.revsplit(rev)[1:]
+        if new_module != self.module:
+            self.module = new_module
+            self.reparent(self.module)
 
         for path, ent in paths:
             entrypath = get_entry_from_path(path, module=self.module)
@@ -432,12 +429,9 @@
 
                 # if a branch is created but entries are removed in the same
                 # changeset, get the right fromrev
-                if parents:
-                    uuid, old_module, fromrev = self.revsplit(parents[0])
-                else:
-                    fromrev = revnum - 1
-                    # might always need to be revnum - 1 in these 3 lines?
-                    old_module = self.modulemap.get(fromrev, self.module)
+                # parents cannot be empty here, you cannot remove things from
+                # a root revision.
+                uuid, old_module, fromrev = self.revsplit(parents[0])
 
                 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
                 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
@@ -577,20 +571,17 @@
 
         self.child_cset = None
         def parselogentry(orig_paths, revnum, author, date, message):
+            """Return the parsed commit object or None, and True if 
+            the revision is a branch root.
+            """
             self.ui.debug("parsing revision %d (%d changes)\n" %
                           (revnum, len(orig_paths)))
 
-            if revnum in self.modulemap:
-                new_module = self.modulemap[revnum]
-                if new_module != self.module:
-                    self.module = new_module
-                    self.reparent(self.module)
-
             rev = self.revid(revnum)
             # branch log might return entries for a parent we already have
 
             if (rev in self.commits or revnum < to_revnum):
-                return None
+                return None, False
 
             parents = []
             # check whether this revision is the start of a branch
@@ -599,15 +590,12 @@
                 if ent.copyfrom_path:
                     # ent.copyfrom_rev may not be the actual last revision
                     prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
-                    self.modulemap[prev] = ent.copyfrom_path
                     parents = [self.revid(prev, ent.copyfrom_path)]
                     self.ui.note('found parent of branch %s at %d: %s\n' % \
                                      (self.module, prev, ent.copyfrom_path))
                 else:
                     self.ui.debug("No copyfrom path, don't know what to do.\n")
 
-            self.modulemap[revnum] = self.module # track backwards in time
-
             orig_paths = orig_paths.items()
             orig_paths.sort()
             paths = []
@@ -618,8 +606,6 @@
                     continue
                 paths.append((path, ent))
 
-            self.paths[rev] = (paths, parents)
-
             # Example SVN datetime. Includes microseconds.
             # ISO-8601 conformant
             # '2007-01-04T17:35:00.902377Z'
@@ -642,17 +628,25 @@
                           rev=rev.encode('utf-8'))
 
             self.commits[rev] = cset
+            # The parents list is *shared* among self.paths and the
+            # commit object. Both will be updated below.
+            self.paths[rev] = (paths, cset.parents)
             if self.child_cset and not self.child_cset.parents:
-                self.child_cset.parents = [rev]
+                self.child_cset.parents[:] = [rev]
             self.child_cset = cset
-            return cset
+            return cset, len(parents) > 0
 
         self.ui.note('fetching revision log for "%s" from %d to %d\n' %
                      (self.module, from_revnum, to_revnum))
 
         try:
             firstcset = None
+            branched = False
             for entry in self.get_log([self.module], from_revnum, to_revnum):
+                if branched:
+                    # The iterator must be exhausted for the child process
+                    # to terminate cleanly.
+                    continue
                 paths, revnum, author, date, message = entry
                 if self.is_blacklisted(revnum):
                     self.ui.note('skipping blacklisted revision %d\n' % revnum)
@@ -660,7 +654,8 @@
                 if paths is None:
                     self.ui.debug('revision %d has no entries\n' % revnum)
                     continue
-                cset = parselogentry(paths, revnum, author, date, message)
+                cset, branched = parselogentry(paths, revnum, author, 
+                                               date, message)
                 if cset:
                     firstcset = cset
 
@@ -686,9 +681,9 @@
         # TODO: ra.get_file transmits the whole file instead of diffs.
         mode = ''
         try:
-            revnum = self.revnum(rev)
-            if self.module != self.modulemap[revnum]:
-                self.module = self.modulemap[revnum]
+            new_module, revnum = self.revsplit(rev)[1:]
+            if self.module != new_module:
+                self.module = new_module
                 self.reparent(self.module)
             info = svn.ra.get_file(self.ra, file, revnum, io)
             if isinstance(info, list):