# HG changeset patch # User Brendan Cully # Date 1186340607 25200 # Node ID ef338e34a9060749ae7cd5c99dd22433eec6ae5f # Parent 514c06098e9cc967587821f95c09a49e0238086f convert: look up copies in getchanges instead of getcommit svn: defer path expansion until getchanges to reduce latency, as well as memory usage when converting incrementally. diff -r 514c06098e9c -r ef338e34a906 hgext/convert/__init__.py --- a/hgext/convert/__init__.py Sun Aug 05 11:30:52 2007 -0700 +++ b/hgext/convert/__init__.py Sun Aug 05 12:03:27 2007 -0700 @@ -193,7 +193,8 @@ do_copies = hasattr(self.dest, 'copyfile') filenames = [] - for f, v in self.source.getchanges(rev): + files, copies = self.source.getchanges(rev) + for f, v in files: newf = self.mapfile(f) if not newf: continue @@ -206,8 +207,8 @@ e = self.source.getmode(f, v) self.dest.putfile(newf, e, data) if do_copies: - if f in commit.copies: - copyf = self.mapfile(commit.copies[f]) + if f in copies: + copyf = self.mapfile(copies[f]) if copyf: # Merely marks that a copy happened. self.dest.copyfile(copyf, newf) diff -r 514c06098e9c -r ef338e34a906 hgext/convert/common.py --- a/hgext/convert/common.py Sun Aug 05 11:30:52 2007 -0700 +++ b/hgext/convert/common.py Sun Aug 05 12:03:27 2007 -0700 @@ -3,15 +3,13 @@ class NoRepo(Exception): pass class commit(object): - def __init__(self, author, date, desc, parents, branch=None, rev=None, - copies={}): + def __init__(self, author, date, desc, parents, branch=None, rev=None): self.author = author self.date = date self.desc = desc self.parents = parents self.branch = branch self.rev = rev - self.copies = copies class converter_source(object): """Conversion source interface""" @@ -42,10 +40,12 @@ raise NotImplementedError() def getchanges(self, version): - """Return sorted list of (filename, id) tuples for all files changed in rev. + """Returns a tuple of (files, copies) + Files is a sorted list of (filename, id) tuples for all files changed + in version, where id is the source revision id of the file. - id just tells us which revision to return in getfile(), e.g. in - git it's an object hash.""" + copies is a dictionary of dest: source + """ raise NotImplementedError() def getcommit(self, version): diff -r 514c06098e9c -r ef338e34a906 hgext/convert/cvs.py --- a/hgext/convert/cvs.py Sun Aug 05 11:30:52 2007 -0700 +++ b/hgext/convert/cvs.py Sun Aug 05 12:03:27 2007 -0700 @@ -250,7 +250,7 @@ files = self.files[rev] cl = files.items() cl.sort() - return cl + return (cl, {}) def getcommit(self, rev): return self.changeset[rev] diff -r 514c06098e9c -r ef338e34a906 hgext/convert/git.py --- a/hgext/convert/git.py Sun Aug 05 11:30:52 2007 -0700 +++ b/hgext/convert/git.py Sun Aug 05 12:03:27 2007 -0700 @@ -48,7 +48,7 @@ s = (m[1] == "120000") self.modecache[(f, h)] = (p and "x") or (s and "l") or "" changes.append((f, h)) - return changes + return (changes, {}) def getcommit(self, version): c = self.catfile(version, "commit") # read the commit hash diff -r 514c06098e9c -r ef338e34a906 hgext/convert/hg.py --- a/hgext/convert/hg.py Sun Aug 05 11:30:52 2007 -0700 +++ b/hgext/convert/hg.py Sun Aug 05 12:03:27 2007 -0700 @@ -151,7 +151,7 @@ m, a, r = self.repo.status(ctx.parents()[0].node(), ctx.node())[:3] changes = [(name, rev) for name in m + a + r] changes.sort() - return changes + return (changes, self.getcopies(ctx)) def getcopies(self, ctx): added = self.repo.status(ctx.parents()[0].node(), ctx.node())[1] @@ -168,7 +168,7 @@ parents = [hex(p.node()) for p in ctx.parents() if p.node() != nullid] return commit(author=ctx.user(), date=util.datestr(ctx.date()), desc=ctx.description(), parents=parents, - branch=ctx.branch(), copies=self.getcopies(ctx)) + branch=ctx.branch()) def gettags(self): tags = [t for t in self.repo.tagslist() if t[0] != 'tip'] diff -r 514c06098e9c -r ef338e34a906 hgext/convert/subversion.py --- a/hgext/convert/subversion.py Sun Aug 05 11:30:52 2007 -0700 +++ b/hgext/convert/subversion.py Sun Aug 05 12:03:27 2007 -0700 @@ -98,7 +98,7 @@ self.module = self.url[len(self.base):] self.modulemap = {} # revision, module self.commits = {} - self.files = {} + self.paths = {} self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding) except SubversionException, e: raise NoRepo("couldn't open SVN repo %s" % self.url) @@ -173,12 +173,14 @@ def getchanges(self, rev): self.modecache = {} - files = self.files[rev] - cl = files - cl.sort() + (paths, parents) = self.paths[rev] + files, copies = self.expandpaths(rev, paths, parents) + files.sort() + files = zip(files, [rev] * len(files)) + # caller caches the result, so free it here to release memory - del self.files[rev] - return cl + del self.paths[rev] + return (files, copies) def getcommit(self, rev): if rev not in self.commits: @@ -350,8 +352,14 @@ copies = {} revnum = self.revnum(rev) + if revnum in self.modulemap: + new_module = self.modulemap[revnum] + if new_module != self.module: + self.module = new_module + self.reparent(self.module) + for path, ent in paths: - # self.ui.write("path %s\n" % path) + self.ui.write("path %s\n" % path) entrypath = get_entry_from_path(path, module=self.module) entry = entrypath.decode(self.encoding) @@ -554,12 +562,7 @@ continue paths.append((path, ent)) - entries, copies = self.expandpaths(rev, paths, parents) - # a list of (filename, id) where id lets us retrieve the file. - # eg in git, id is the object hash. for svn it'll be the - self.files[rev] = zip(entries, [rev] * len(entries)) - if not entries: - return + self.paths[rev] = (paths, parents) # Example SVN datetime. Includes microseconds. # ISO-8601 conformant @@ -579,7 +582,6 @@ date=util.datestr(date), desc=log, parents=parents, - copies=copies, branch=branch, rev=rev.encode('utf-8'))