# HG changeset patch # User Matt Mackall # Date 1200672456 21600 # Node ID 5692bed8230bf1bf4a41a91f8f6d478b7a2fb55d # Parent 2c565b9598b8780cfb1ee10a6226e8f5d0743f4f# Parent fb93c774dffff3601835102fdb57417428307cba Merge with crew diff -r 2c565b9598b8 -r 5692bed8230b hgext/convert/subversion.py --- a/hgext/convert/subversion.py Fri Jan 18 09:03:17 2008 -0600 +++ b/hgext/convert/subversion.py Fri Jan 18 10:07:36 2008 -0600 @@ -89,6 +89,9 @@ receiver) except SubversionException, (inst, num): pickle.dump(num, fp, protocol) + except IOError: + # Caller may interrupt the iteration + pickle.dump(None, fp, protocol) else: pickle.dump(None, fp, protocol) fp.close() @@ -102,7 +105,53 @@ args = decodeargs(sys.stdin.read()) get_log_child(sys.stdout, *args) +class logstream: + """Interruptible revision log iterator.""" + def __init__(self, stdout): + self._stdout = stdout + + def __iter__(self): + while True: + entry = pickle.load(self._stdout) + try: + orig_paths, revnum, author, date, message = entry + except: + if entry is None: + break + raise SubversionException("child raised exception", entry) + yield entry + + def close(self): + if self._stdout: + self._stdout.close() + self._stdout = None + +def get_log(url, paths, start, end, limit=0, discover_changed_paths=True, + strict_node_history=False): + args = [url, paths, start, end, limit, discover_changed_paths, + strict_node_history] + arg = encodeargs(args) + hgexe = util.hgexecutable() + cmd = '%s debugsvnlog' % util.shellquote(hgexe) + stdin, stdout = os.popen2(cmd, 'b') + stdin.write(arg) + stdin.close() + return logstream(stdout) + # SVN conversion code stolen from bzr-svn and tailor +# +# Subversion looks like a versioned filesystem, branches structures +# are defined by conventions and not enforced by the tool. First, +# we define the potential branches (modules) as "trunk" and "branches" +# children directories. Revisions are then identified by their +# module and revision number (and a repository identifier). +# +# The revision graph is really a tree (or a forest). By default, a +# revision parent is the previous revision in the same module. If the +# module directory is copied/moved from another module then the +# revision is the module root and its parent the source revision in +# the parent module. A revision has at most one parent. +# class svn_source(converter_source): def __init__(self, ui, url, rev=None): super(svn_source, self).__init__(ui, url, rev=rev) @@ -133,7 +182,6 @@ self.ctx = self.transport.client self.base = svn.ra.get_repos_root(self.ra) self.module = self.url[len(self.base):] - self.modulemap = {} # revision, module self.commits = {} self.paths = {} self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding) @@ -257,45 +305,26 @@ uuid, module, revnum = self.revsplit(rev) self.module = module self.reparent(module) + # We assume that: + # - requests for revisions after "stop" come from the + # revision graph backward traversal. Cache all of them + # down to stop, they will be used eventually. + # - requests for revisions before "stop" come to get + # isolated branches parents. Just fetch what is needed. stop = self.lastrevs.get(module, 0) - self._fetch_revisions(from_revnum=revnum, to_revnum=stop) + if revnum < stop: + stop = revnum + 1 + self._fetch_revisions(revnum, stop) commit = self.commits[rev] # caller caches the result, so free it here to release memory del self.commits[rev] return commit - def get_log(self, paths, start, end, limit=0, discover_changed_paths=True, - strict_node_history=False): - - def parent(fp): - while True: - entry = pickle.load(fp) - try: - orig_paths, revnum, author, date, message = entry - except: - if entry is None: - break - raise SubversionException("child raised exception", entry) - yield entry - - args = [self.url, paths, start, end, limit, discover_changed_paths, - strict_node_history] - arg = encodeargs(args) - hgexe = util.hgexecutable() - cmd = '%s debugsvnlog' % util.shellquote(hgexe) - stdin, stdout = os.popen2(cmd, 'b') - - stdin.write(arg) - stdin.close() - - for p in parent(stdout): - yield p - def gettags(self): tags = {} start = self.revnum(self.head) try: - for entry in self.get_log([self.tags], 0, start): + for entry in get_log(self.url, [self.tags], 0, start): orig_paths, revnum, author, date, message = entry for path in orig_paths: if not path.startswith(self.tags+'/'): @@ -400,13 +429,11 @@ entries = [] copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions. copies = {} - revnum = self.revnum(rev) - if revnum in self.modulemap: - new_module = self.modulemap[revnum] - if new_module != self.module: - self.module = new_module - self.reparent(self.module) + new_module, revnum = self.revsplit(rev)[1:] + if new_module != self.module: + self.module = new_module + self.reparent(self.module) for path, ent in paths: entrypath = get_entry_from_path(path, module=self.module) @@ -432,12 +459,9 @@ # if a branch is created but entries are removed in the same # changeset, get the right fromrev - if parents: - uuid, old_module, fromrev = self.revsplit(parents[0]) - else: - fromrev = revnum - 1 - # might always need to be revnum - 1 in these 3 lines? - old_module = self.modulemap.get(fromrev, self.module) + # parents cannot be empty here, you cannot remove things from + # a root revision. + uuid, old_module, fromrev = self.revsplit(parents[0]) basepath = old_module + "/" + get_entry_from_path(path, module=self.module) entrypath = old_module + "/" + get_entry_from_path(path, module=self.module) @@ -508,6 +532,9 @@ # If the directory just had a prop change, # then we shouldn't need to look for its children. + if ent.action == 'M': + continue + # Also this could create duplicate entries. Not sure # whether this will matter. Maybe should make entries a set. # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev @@ -568,23 +595,23 @@ return (entries, copies) - def _fetch_revisions(self, from_revnum = 0, to_revnum = 347): + def _fetch_revisions(self, from_revnum, to_revnum): + if from_revnum < to_revnum: + from_revnum, to_revnum = to_revnum, from_revnum + self.child_cset = None def parselogentry(orig_paths, revnum, author, date, message): + """Return the parsed commit object or None, and True if + the revision is a branch root. + """ self.ui.debug("parsing revision %d (%d changes)\n" % (revnum, len(orig_paths))) - if revnum in self.modulemap: - new_module = self.modulemap[revnum] - if new_module != self.module: - self.module = new_module - self.reparent(self.module) - rev = self.revid(revnum) # branch log might return entries for a parent we already have - if (rev in self.commits or - (revnum < self.lastrevs.get(self.module, 0))): - return + + if (rev in self.commits or revnum < to_revnum): + return None, False parents = [] # check whether this revision is the start of a branch @@ -593,15 +620,12 @@ if ent.copyfrom_path: # ent.copyfrom_rev may not be the actual last revision prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev) - self.modulemap[prev] = ent.copyfrom_path parents = [self.revid(prev, ent.copyfrom_path)] self.ui.note('found parent of branch %s at %d: %s\n' % \ (self.module, prev, ent.copyfrom_path)) else: self.ui.debug("No copyfrom path, don't know what to do.\n") - self.modulemap[revnum] = self.module # track backwards in time - orig_paths = orig_paths.items() orig_paths.sort() paths = [] @@ -612,8 +636,6 @@ continue paths.append((path, ent)) - self.paths[rev] = (paths, parents) - # Example SVN datetime. Includes microseconds. # ISO-8601 conformant # '2007-01-04T17:35:00.902377Z' @@ -636,23 +658,50 @@ rev=rev.encode('utf-8')) self.commits[rev] = cset + # The parents list is *shared* among self.paths and the + # commit object. Both will be updated below. + self.paths[rev] = (paths, cset.parents) if self.child_cset and not self.child_cset.parents: - self.child_cset.parents = [rev] + self.child_cset.parents[:] = [rev] self.child_cset = cset + return cset, len(parents) > 0 self.ui.note('fetching revision log for "%s" from %d to %d\n' % (self.module, from_revnum, to_revnum)) try: - for entry in self.get_log([self.module], from_revnum, to_revnum): - orig_paths, revnum, author, date, message = entry - if self.is_blacklisted(revnum): - self.ui.note('skipping blacklisted revision %d\n' % revnum) - continue - if orig_paths is None: - self.ui.debug('revision %d has no entries\n' % revnum) - continue - parselogentry(orig_paths, revnum, author, date, message) + firstcset = None + stream = get_log(self.url, [self.module], from_revnum, to_revnum) + try: + for entry in stream: + paths, revnum, author, date, message = entry + if self.is_blacklisted(revnum): + self.ui.note('skipping blacklisted revision %d\n' + % revnum) + continue + if paths is None: + self.ui.debug('revision %d has no entries\n' % revnum) + continue + cset, branched = parselogentry(paths, revnum, author, + date, message) + if cset: + firstcset = cset + if branched: + break + finally: + stream.close() + + if firstcset and not firstcset.parents: + # The first revision of the sequence (the last fetched one) + # has invalid parents if not a branch root. Find the parent + # revision now, if any. + try: + firstrevnum = self.revnum(firstcset.rev) + if firstrevnum > 1: + latest = self.latest(self.module, firstrevnum - 1) + firstcset.parents.append(self.revid(latest)) + except util.Abort: + pass except SubversionException, (inst, num): if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION: raise NoSuchRevision(branch=self, @@ -664,9 +713,9 @@ # TODO: ra.get_file transmits the whole file instead of diffs. mode = '' try: - revnum = self.revnum(rev) - if self.module != self.modulemap[revnum]: - self.module = self.modulemap[revnum] + new_module, revnum = self.revsplit(rev)[1:] + if self.module != new_module: + self.module = new_module self.reparent(self.module) info = svn.ra.get_file(self.ra, file, revnum, io) if isinstance(info, list): diff -r 2c565b9598b8 -r 5692bed8230b tests/test-convert-svn-branches --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-convert-svn-branches Fri Jan 18 10:07:36 2008 -0600 @@ -0,0 +1,89 @@ +#!/bin/sh + +"$TESTDIR/hghave" svn svn-bindings || exit 80 + +fix_path() +{ + tr '\\' / +} + +echo "[extensions]" >> $HGRCPATH +echo "convert = " >> $HGRCPATH +echo "hgext.graphlog =" >> $HGRCPATH + +svnadmin create svn-repo + +svnpath=`pwd | fix_path` +# SVN wants all paths to start with a slash. Unfortunately, +# Windows ones don't. Handle that. +expr $svnpath : "\/" > /dev/null +if [ $? -ne 0 ]; then + svnpath='/'$svnpath +fi + +echo % initial svn import +mkdir projA +cd projA +mkdir trunk +mkdir branches +mkdir tags +cd .. + +svnurl=file://$svnpath/svn-repo/projA +svn import -m "init projA" projA $svnurl | fix_path + +echo % update svn repository +svn co $svnurl A | fix_path +cd A +echo hello > trunk/letter.txt +echo hey > trunk/letter2.txt +echo ho > trunk/letter3.txt +svn add trunk/letter.txt trunk/letter2.txt trunk/letter3.txt +svn ci -m hello + +echo % branch to old letters +svn copy trunk branches/old +svn rm branches/old/letter3.txt +svn ci -m "branch trunk, remove letter3" +svn up + +echo % update trunk +echo "what can I say ?" >> trunk/letter.txt +svn ci -m "change letter" + +echo % update old branch +echo "what's up ?" >> branches/old/letter2.txt +svn ci -m "change letter2" + +echo % create a cross-branch revision +svn move -m "move letter2" trunk/letter2.txt \ + branches/old/letter3.txt +echo "I am fine" >> branches/old/letter3.txt +svn ci -m "move and update letter3.txt" + +echo % update old branch again +echo "bye" >> branches/old/letter2.txt +svn ci -m "change letter2 again" + +echo % update trunk again +echo "how are you ?" >> trunk/letter.txt +svn ci -m "last change to letter" +cd .. + +echo % convert trunk and branches +hg convert --datesort $svnurl A-hg + +echo % branch again from a converted revision +cd A +svn copy -r 1 $svnurl/trunk branches/old2 +svn ci -m "branch trunk@1 into old2" +cd .. + +echo % convert again +hg convert --datesort $svnurl A-hg + +cd A-hg +hg glog --template '#rev# #desc|firstline# files: #files#\n' +hg branches | sed 's/:.*/:/' +hg tags -q +cd .. diff -r 2c565b9598b8 -r 5692bed8230b tests/test-convert-svn-branches.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-convert-svn-branches.out Fri Jan 18 10:07:36 2008 -0600 @@ -0,0 +1,101 @@ +% initial svn import +Adding projA/trunk +Adding projA/branches +Adding projA/tags + +Committed revision 1. +% update svn repository +A A/trunk +A A/branches +A A/tags +Checked out revision 1. +A trunk/letter.txt +A trunk/letter2.txt +A trunk/letter3.txt +Adding trunk/letter.txt +Adding trunk/letter2.txt +Adding trunk/letter3.txt +Transmitting file data ... +Committed revision 2. +% branch to old letters +A branches/old +D branches/old/letter3.txt +Adding branches/old +Adding branches/old/letter.txt +Adding branches/old/letter2.txt +Deleting branches/old/letter3.txt + +Committed revision 3. +At revision 3. +% update trunk +Sending trunk/letter.txt +Transmitting file data . +Committed revision 4. +% update old branch +Sending branches/old/letter2.txt +Transmitting file data . +Committed revision 5. +% create a cross-branch revision +A branches/old/letter3.txt +D trunk/letter2.txt +Adding branches/old/letter3.txt +Deleting trunk/letter2.txt +Transmitting file data . +Committed revision 6. +% update old branch again +Sending branches/old/letter2.txt +Transmitting file data . +Committed revision 7. +% update trunk again +Sending trunk/letter.txt +Transmitting file data . +Committed revision 8. +% convert trunk and branches +initializing destination A-hg repository +scanning source... +sorting... +converting... +8 init projA +7 hello +6 branch trunk, remove letter3 +5 change letter +4 change letter2 +3 move and update letter3.txt +2 move and update letter3.txt +1 change letter2 again +0 last change to letter +% branch again from a converted revision +Checked out revision 1. +A branches/old2 +Adding branches/old2 + +Committed revision 9. +% convert again +scanning source... +sorting... +converting... +0 branch trunk@1 into old2 +o 9 branch trunk@1 into old2 files: +| +| o 8 last change to letter files: letter.txt +| | +| | o 7 change letter2 again files: letter2.txt +| | | +| o | 6 move and update letter3.txt files: letter2.txt +| | | +| | o 5 move and update letter3.txt files: letter3.txt +| | | +| | o 4 change letter2 files: letter2.txt +| | | +| o | 3 change letter files: letter.txt +| | | ++---o 2 branch trunk, remove letter3 files: letter.txt letter.txt letter2.txt letter2.txt +| | +| o 1 hello files: letter.txt letter2.txt letter3.txt +|/ +o 0 init projA files: + +old2 9: +default 8: +old 7: +tip