convert: add filename filtering and renaming support
authorBryan O'Sullivan <bos@serpentine.com>
Thu, 26 Jul 2007 13:34:36 -0700
changeset 5016 4ebc8693ce72
parent 5015 cb100605a516
child 5017 06329efa722d
convert: add filename filtering and renaming support
hgext/convert/__init__.py
hgext/convert/hg.py
--- a/hgext/convert/__init__.py	Thu Jul 26 13:34:36 2007 -0700
+++ b/hgext/convert/__init__.py	Thu Jul 26 13:34:36 2007 -0700
@@ -11,8 +11,9 @@
 from hg import mercurial_source, mercurial_sink
 from subversion import convert_svn
 
-import os, shutil
+import os, shlex, shutil
 from mercurial import hg, ui, util, commands
+from mercurial.i18n import _
 
 commands.norepo += " convert"
 
@@ -42,7 +43,7 @@
     raise util.Abort('%s: unknown repository type' % path)
 
 class convert(object):
-    def __init__(self, ui, source, dest, revmapfile, opts):
+    def __init__(self, ui, source, dest, revmapfile, filemapper, opts):
 
         self.source = source
         self.dest = dest
@@ -53,6 +54,7 @@
         self.revmapfilefd = None
         self.authors = {}
         self.authorfile = None
+        self.mapfile = filemapper
 
         self.map = {}
         try:
@@ -191,28 +193,31 @@
         afile.close()
 
     def copy(self, rev):
-        c = self.commitcache[rev]
-        files = self.source.getchanges(rev)
+        commit = self.commitcache[rev]
+        do_copies = hasattr(self.dest, 'copyfile')
+        filenames = []
 
-        do_copies = hasattr(self.dest, 'copyfile')
-
-        for f, v in files:
+        for f, v in self.source.getchanges(rev):
+            newf = self.mapfile(f)
+            if not newf:
+                continue
+            filenames.append(newf)
             try:
                 data = self.source.getfile(f, v)
             except IOError, inst:
-                self.dest.delfile(f)
+                self.dest.delfile(newf)
             else:
                 e = self.source.getmode(f, v)
-                self.dest.putfile(f, e, data)
+                self.dest.putfile(newf, e, data)
                 if do_copies:
-                    if f in c.copies:
-                        # Merely marks that a copy happened.
-                        self.dest.copyfile(c.copies[f], f)
+                    if f in commit.copies:
+                        copyf = self.mapfile(commit.copies[f])
+                        if copyf:
+                            # Merely marks that a copy happened.
+                            self.dest.copyfile(copyf, newf)
 
-
-        r = [self.map[v] for v in c.parents]
-        f = [f for f, v in files]
-        newnode = self.dest.putcommit(f, r, c)
+        parents = [self.map[r] for r in commit.parents]
+        newnode = self.dest.putcommit(filenames, parents, commit)
         self.mapentry(rev, newnode)
 
     def convert(self):
@@ -262,6 +267,87 @@
         if self.revmapfilefd:
             self.revmapfilefd.close()
 
+def rpairs(name):
+    e = len(name)
+    while e != -1:
+        yield name[:e], name[e+1:]
+        e = name.rfind('/', 0, e)
+
+class filemapper(object):
+    '''Map and filter filenames when importing.
+    A name can be mapped to itself, a new name, or None (omit from new
+    repository).'''
+
+    def __init__(self, ui, path=None):
+        self.ui = ui
+        self.include = {}
+        self.exclude = {}
+        self.rename = {}
+        if path:
+            if self.parse(path):
+                raise util.Abort(_('errors in filemap'))
+
+    def parse(self, path):
+        errs = 0
+        def check(name, mapping, listname):
+            if name in mapping:
+                self.ui.warn(_('%s:%d: %r already in %s list\n') %
+                             (lex.infile, lex.lineno, name, listname))
+                return 1
+            return 0
+        lex = shlex.shlex(open(path), path, True)
+        lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
+        cmd = lex.get_token()
+        while cmd:
+            if cmd == 'include':
+                name = lex.get_token()
+                errs += check(name, self.exclude, 'exclude')
+                self.include[name] = name
+            elif cmd == 'exclude':
+                name = lex.get_token()
+                errs += check(name, self.include, 'include')
+                errs += check(name, self.rename, 'rename')
+                self.exclude[name] = name
+            elif cmd == 'rename':
+                src = lex.get_token()
+                dest = lex.get_token()
+                errs += check(src, self.exclude, 'exclude')
+                self.rename[src] = dest
+            elif cmd == 'source':
+                errs += self.parse(lex.get_token())
+            else:
+                self.ui.warn(_('%s:%d: unknown directive %r\n') %
+                             (lex.infile, lex.lineno, cmd))
+                errs += 1
+            cmd = lex.get_token()
+        return errs
+
+    def lookup(self, name, mapping):
+        for pre, suf in rpairs(name):
+            try:
+                return mapping[pre], pre, suf
+            except KeyError, err:
+                pass
+        return '', name, ''
+        
+    def __call__(self, name):
+        if self.include:
+            inc = self.lookup(name, self.include)[0]
+        else:
+            inc = name
+        if self.exclude:
+            exc = self.lookup(name, self.exclude)[0]
+        else:
+            exc = ''
+        if not inc or exc:
+            return None
+        newpre, pre, suf = self.lookup(name, self.rename)
+        if newpre:
+            if suf:
+                return newpre + '/' + suf
+            return newpre
+        return name
+
 def _convert(ui, src, dest=None, revmapfile=None, **opts):
     """Convert a foreign SCM repository to a Mercurial one.
 
@@ -343,13 +429,16 @@
         except:
             revmapfile = os.path.join(destc, "map")
 
-    c = convert(ui, srcc, destc, revmapfile, opts)
+
+    c = convert(ui, srcc, destc, revmapfile, filemapper(ui, opts['filemap']),
+                opts)
     c.convert()
 
 cmdtable = {
     "convert":
         (_convert,
          [('A', 'authors', '', 'username mapping filename'),
+          ('', 'filemap', '', 'remap file names using contents of file'),
           ('r', 'rev', '', 'import up to target revision REV'),
           ('', 'datesort', None, 'try to sort changesets by date')],
          'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
--- a/hgext/convert/hg.py	Thu Jul 26 13:34:36 2007 -0700
+++ b/hgext/convert/hg.py	Thu Jul 26 13:34:36 2007 -0700
@@ -59,7 +59,10 @@
             pass
 
     def putcommit(self, files, parents, commit):
-        seen = {}
+        if not files:
+            return hex(self.repo.changelog.tip())
+
+        seen = {hex(nullid): 1}
         pl = []
         for p in parents:
             if p not in seen: