convert: look up copies in getchanges instead of getcommit
svn: defer path expansion until getchanges to reduce latency, as well as memory
usage when converting incrementally.
--- a/hgext/convert/__init__.py Sun Aug 05 11:30:52 2007 -0700
+++ b/hgext/convert/__init__.py Sun Aug 05 12:03:27 2007 -0700
@@ -193,7 +193,8 @@
do_copies = hasattr(self.dest, 'copyfile')
filenames = []
- for f, v in self.source.getchanges(rev):
+ files, copies = self.source.getchanges(rev)
+ for f, v in files:
newf = self.mapfile(f)
if not newf:
continue
@@ -206,8 +207,8 @@
e = self.source.getmode(f, v)
self.dest.putfile(newf, e, data)
if do_copies:
- if f in commit.copies:
- copyf = self.mapfile(commit.copies[f])
+ if f in copies:
+ copyf = self.mapfile(copies[f])
if copyf:
# Merely marks that a copy happened.
self.dest.copyfile(copyf, newf)
--- a/hgext/convert/common.py Sun Aug 05 11:30:52 2007 -0700
+++ b/hgext/convert/common.py Sun Aug 05 12:03:27 2007 -0700
@@ -3,15 +3,13 @@
class NoRepo(Exception): pass
class commit(object):
- def __init__(self, author, date, desc, parents, branch=None, rev=None,
- copies={}):
+ def __init__(self, author, date, desc, parents, branch=None, rev=None):
self.author = author
self.date = date
self.desc = desc
self.parents = parents
self.branch = branch
self.rev = rev
- self.copies = copies
class converter_source(object):
"""Conversion source interface"""
@@ -42,10 +40,12 @@
raise NotImplementedError()
def getchanges(self, version):
- """Return sorted list of (filename, id) tuples for all files changed in rev.
+ """Returns a tuple of (files, copies)
+ Files is a sorted list of (filename, id) tuples for all files changed
+ in version, where id is the source revision id of the file.
- id just tells us which revision to return in getfile(), e.g. in
- git it's an object hash."""
+ copies is a dictionary of dest: source
+ """
raise NotImplementedError()
def getcommit(self, version):
--- a/hgext/convert/cvs.py Sun Aug 05 11:30:52 2007 -0700
+++ b/hgext/convert/cvs.py Sun Aug 05 12:03:27 2007 -0700
@@ -250,7 +250,7 @@
files = self.files[rev]
cl = files.items()
cl.sort()
- return cl
+ return (cl, {})
def getcommit(self, rev):
return self.changeset[rev]
--- a/hgext/convert/git.py Sun Aug 05 11:30:52 2007 -0700
+++ b/hgext/convert/git.py Sun Aug 05 12:03:27 2007 -0700
@@ -48,7 +48,7 @@
s = (m[1] == "120000")
self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
changes.append((f, h))
- return changes
+ return (changes, {})
def getcommit(self, version):
c = self.catfile(version, "commit") # read the commit hash
--- a/hgext/convert/hg.py Sun Aug 05 11:30:52 2007 -0700
+++ b/hgext/convert/hg.py Sun Aug 05 12:03:27 2007 -0700
@@ -151,7 +151,7 @@
m, a, r = self.repo.status(ctx.parents()[0].node(), ctx.node())[:3]
changes = [(name, rev) for name in m + a + r]
changes.sort()
- return changes
+ return (changes, self.getcopies(ctx))
def getcopies(self, ctx):
added = self.repo.status(ctx.parents()[0].node(), ctx.node())[1]
@@ -168,7 +168,7 @@
parents = [hex(p.node()) for p in ctx.parents() if p.node() != nullid]
return commit(author=ctx.user(), date=util.datestr(ctx.date()),
desc=ctx.description(), parents=parents,
- branch=ctx.branch(), copies=self.getcopies(ctx))
+ branch=ctx.branch())
def gettags(self):
tags = [t for t in self.repo.tagslist() if t[0] != 'tip']
--- a/hgext/convert/subversion.py Sun Aug 05 11:30:52 2007 -0700
+++ b/hgext/convert/subversion.py Sun Aug 05 12:03:27 2007 -0700
@@ -98,7 +98,7 @@
self.module = self.url[len(self.base):]
self.modulemap = {} # revision, module
self.commits = {}
- self.files = {}
+ self.paths = {}
self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
except SubversionException, e:
raise NoRepo("couldn't open SVN repo %s" % self.url)
@@ -173,12 +173,14 @@
def getchanges(self, rev):
self.modecache = {}
- files = self.files[rev]
- cl = files
- cl.sort()
+ (paths, parents) = self.paths[rev]
+ files, copies = self.expandpaths(rev, paths, parents)
+ files.sort()
+ files = zip(files, [rev] * len(files))
+
# caller caches the result, so free it here to release memory
- del self.files[rev]
- return cl
+ del self.paths[rev]
+ return (files, copies)
def getcommit(self, rev):
if rev not in self.commits:
@@ -350,8 +352,14 @@
copies = {}
revnum = self.revnum(rev)
+ if revnum in self.modulemap:
+ new_module = self.modulemap[revnum]
+ if new_module != self.module:
+ self.module = new_module
+ self.reparent(self.module)
+
for path, ent in paths:
- # self.ui.write("path %s\n" % path)
+ self.ui.write("path %s\n" % path)
entrypath = get_entry_from_path(path, module=self.module)
entry = entrypath.decode(self.encoding)
@@ -554,12 +562,7 @@
continue
paths.append((path, ent))
- entries, copies = self.expandpaths(rev, paths, parents)
- # a list of (filename, id) where id lets us retrieve the file.
- # eg in git, id is the object hash. for svn it'll be the
- self.files[rev] = zip(entries, [rev] * len(entries))
- if not entries:
- return
+ self.paths[rev] = (paths, parents)
# Example SVN datetime. Includes microseconds.
# ISO-8601 conformant
@@ -579,7 +582,6 @@
date=util.datestr(date),
desc=log,
parents=parents,
- copies=copies,
branch=branch,
rev=rev.encode('utf-8'))