cvsps: recognize and eliminate CVS' synthetic "file added" revisions.
--- a/hgext/convert/cvsps.py Mon Mar 09 21:00:37 2009 -0500
+++ b/hgext/convert/cvsps.py Wed Mar 18 09:15:38 2009 -0400
@@ -33,6 +33,7 @@
.rcs - name of file as returned from CVS
.revision - revision number as tuple
.tags - list of tags on the file
+ .synthetic - is this a synthetic "file ... added on ..." revision?
'''
def __init__(self, **entries):
self.__dict__.update(entries)
@@ -107,6 +108,8 @@
re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')
re_70 = re.compile('branches: (.+);$')
+ file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
+
prefix = '' # leading path to strip of what we get from CVS
if directory is None:
@@ -279,7 +282,8 @@
assert match, _('expected revision number')
e = logentry(rcs=scache(rcs), file=scache(filename),
revision=tuple([int(x) for x in match.group(1).split('.')]),
- branches=[], parent=None)
+ branches=[], parent=None,
+ synthetic=False)
state = 6
elif state == 6:
@@ -338,6 +342,22 @@
else:
e.comment.append(line)
+ # When a file is added on a branch B1, CVS creates a synthetic
+ # dead trunk revision 1.1 so that the branch has a root.
+ # Likewise, if you merge such a file to a later branch B2 (one
+ # that already existed when the file was added on B1), CVS
+ # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
+ # these revisions now, but mark them synthetic so
+ # createchangeset() can take care of them.
+ if (store and
+ e.dead and
+ e.revision[-1] == 1 and # 1.1 or 1.1.x.1
+ len(e.comment) == 1 and
+ file_added_re.match(e.comment[0])):
+ ui.debug(_('found synthetic rev in %s: %r\n')
+ % (e.rcs, e.comment[0]))
+ e.synthetic = True
+
if store:
# clean up the results and save in the log.
store = False
@@ -399,6 +419,7 @@
.entries - list of logentry objects in this changeset
.parents - list of one or two parent changesets
.tags - list of tags on this changeset
+ .synthetic - from synthetic revision "file ... added on branch ..."
'''
def __init__(self, **entries):
self.__dict__.update(entries)
@@ -438,6 +459,19 @@
files[e.file] = True
c.date = e.date # changeset date is date of latest commit in it
+ # Mark synthetic changesets
+
+ for c in changesets:
+ # Synthetic revisions always get their own changeset, because
+ # the log message includes the filename. E.g. if you add file3
+ # and file4 on a branch, you get four log entries and three
+ # changesets:
+ # "File file3 was added on branch ..." (synthetic, 1 entry)
+ # "File file4 was added on branch ..." (synthetic, 1 entry)
+ # "Add file3 and file4 to fix ..." (real, 2 entries)
+ # Hence the check for 1 entry here.
+ c.synthetic = (len(c.entries) == 1 and c.entries[0].synthetic)
+
# Sort files in each changeset
for c in changesets:
@@ -546,7 +580,20 @@
c.parents = []
if p is not None:
- c.parents.append(changesets[p])
+ p = changesets[p]
+
+ # Ensure no changeset has a synthetic changeset as a parent.
+ while p.synthetic:
+ assert len(p.parents) <= 1, \
+ _('synthetic changeset cannot have multiple parents')
+ if p.parents:
+ p = p.parents[0]
+ else:
+ p = None
+ break
+
+ if p is not None:
+ c.parents.append(p)
if mergefrom:
m = mergefrom.search(c.comment)
@@ -582,6 +629,15 @@
branches[c.branch] = i
i += 1
+ # Drop synthetic changesets (safe now that we have ensured no other
+ # changesets can have them as parents).
+ i = 0
+ while i < len(changesets):
+ if changesets[i].synthetic:
+ del changesets[i]
+ else:
+ i += 1
+
# Number changesets
for i, c in enumerate(changesets):
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-cvs-synthetic Wed Mar 18 09:15:38 2009 -0400
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+# This feature requires use of builtin cvsps!
+"$TESTDIR/hghave" cvs || exit 80
+
+# XXX lots of duplication with other test-convert-cvs* scripts
+
+set -e
+
+echo "[extensions]" >> $HGRCPATH
+echo "convert = " >> $HGRCPATH
+echo "[convert]" >> $HGRCPATH
+echo "cvsps=builtin" >> $HGRCPATH
+
+echo % create cvs repository with one project
+mkdir cvsrepo
+cd cvsrepo
+export CVSROOT=`pwd`
+export CVS_OPTIONS=-f
+cd ..
+
+filter='sed "s:$CVSROOT:*REPO*:g"'
+cvscall()
+{
+ cvs -f "$@" | eval $filter
+}
+
+cvscall -q -d "$CVSROOT" init
+mkdir cvsrepo/proj
+
+cvscall co proj
+
+echo % create file1 on the trunk
+cd proj
+touch file1
+cvscall add file1
+cvscall ci -m"add file1 on trunk" file1
+
+echo % create two branches
+cvscall tag -b v1_0
+cvscall tag -b v1_1
+
+echo % create file2 on branch v1_0
+cvs up -rv1_0
+touch file2
+cvscall add file2
+cvscall ci -m"add file2 on branch v1_0" file2
+
+echo % create file3, file4 on branch v1_1
+cvs up -rv1_1
+touch file3
+touch file4
+cvscall add file3 file4
+cvscall ci -m"add file3, file4 on branch v1_1" file3 file4
+
+echo % merge file2 from v1_0 to v1_1
+cvscall up -jv1_0
+cvscall ci -m"merge file2 from v1_0 to v1_1"
+
+echo % convert to hg
+cd ..
+hg convert proj proj.hg | eval $filter
+
+echo % hg log output
+hg -R proj.hg log --template "{rev} {desc}\n"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-convert-cvs-synthetic.out Wed Mar 18 09:15:38 2009 -0400
@@ -0,0 +1,72 @@
+% create cvs repository with one project
+cvs checkout: Updating proj
+% create file1 on the trunk
+cvs add: scheduling file `file1' for addition
+cvs add: use 'cvs commit' to add this file permanently
+RCS file: *REPO*/proj/file1,v
+done
+Checking in file1;
+*REPO*/proj/file1,v <-- file1
+initial revision: 1.1
+done
+% create two branches
+cvs tag: Tagging .
+T file1
+cvs tag: Tagging .
+T file1
+% create file2 on branch v1_0
+cvs update: Updating .
+cvs add: scheduling file `file2' for addition on branch `v1_0'
+cvs add: use 'cvs commit' to add this file permanently
+RCS file: *REPO*/proj/Attic/file2,v
+done
+Checking in file2;
+*REPO*/proj/Attic/file2,v <-- file2
+new revision: 1.1.2.1; previous revision: 1.1
+done
+% create file3, file4 on branch v1_1
+cvs update: Updating .
+cvs update: file2 is no longer in the repository
+cvs add: scheduling file `file3' for addition on branch `v1_1'
+cvs add: scheduling file `file4' for addition on branch `v1_1'
+cvs add: use 'cvs commit' to add these files permanently
+RCS file: *REPO*/proj/Attic/file3,v
+done
+Checking in file3;
+*REPO*/proj/Attic/file3,v <-- file3
+new revision: 1.1.2.1; previous revision: 1.1
+done
+RCS file: *REPO*/proj/Attic/file4,v
+done
+Checking in file4;
+*REPO*/proj/Attic/file4,v <-- file4
+new revision: 1.1.2.1; previous revision: 1.1
+done
+% merge file2 from v1_0 to v1_1
+cvs update: Updating .
+U file2
+cvs commit: Examining .
+Checking in file2;
+*REPO*/proj/Attic/file2,v <-- file2
+new revision: 1.1.4.2; previous revision: 1.1.4.1
+done
+% convert to hg
+initializing destination proj.hg repository
+using builtin cvsps
+collecting CVS rlog
+9 log entries
+creating changesets
+4 changeset entries
+connecting to *REPO*
+scanning source...
+sorting...
+converting...
+3 add file1 on trunk
+2 add file2 on branch v1_0
+1 add file3, file4 on branch v1_1
+0 merge file2 from v1_0 to v1_1
+% hg log output
+3 merge file2 from v1_0 to v1_1
+2 add file3, file4 on branch v1_1
+1 add file2 on branch v1_0
+0 add file1 on trunk