diff hgext/convert/git.py @ 30660:1f21a6835604

convert: add config option to copy extra keys from Git commits Git commit objects support storing arbitrary key-value metadata. While there is no user-facing mechanism in Git to record these values, some tools do record data here. Currently, `hg convert` only handles the "author," "committer," and "parent" keys in Git commit objects. All other keys are ignored. This means that any custom keys are lost when converting Git repos to Mercurial. This patch implements support for copying a whitelist of extra keys from Git commit objects to the "extras" dict of the destination. As the added tests demonstate, this allows extra metadata to be preserved during the conversion process. This patch stops short of converting all metadata to "extras." We could potentially implement this via `convert.git.extrakeys=*` or similar. But copying everything by default is a bit dangerous because if Git adds new keys to commit objects, we could find ourselves copying things that shouldn't be copied! This patch also assumes the source key is the same as the destination key. We could implement support for prefixing the output key to distinguish it as coming from Git. But until this feature is needed, I'm inclined to hold off implementing it.
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 22 Dec 2016 23:28:11 -0700
parents ea3540e66fd8
children ced0d686ecb3
line wrap: on
line diff
--- a/hgext/convert/git.py	Thu Dec 22 09:26:47 2016 -0800
+++ b/hgext/convert/git.py	Thu Dec 22 23:28:11 2016 -0700
@@ -31,6 +31,18 @@
     def hgsubstate(self):
         return "%s %s" % (self.node, self.path)
 
+# Keys in extra fields that should not be copied if the user requests.
+bannedextrakeys = set([
+    # Git commit object built-ins.
+    'tree',
+    'parent',
+    'author',
+    'committer',
+    # Mercurial built-ins.
+    'branch',
+    'close',
+])
+
 class convert_git(common.converter_source, common.commandline):
     # Windows does not support GIT_DIR= construct while other systems
     # cannot remove environment variable. Just assume none have
@@ -92,6 +104,12 @@
 
         self.catfilepipe = self.gitpipe('cat-file', '--batch')
 
+        self.copyextrakeys = self.ui.configlist('convert', 'git.extrakeys')
+        banned = set(self.copyextrakeys) & bannedextrakeys
+        if banned:
+            raise error.Abort(_('copying of extra key is forbidden: %s') %
+                              _(', ').join(sorted(banned)))
+
     def after(self):
         for f in self.catfilepipe:
             f.close()
@@ -279,6 +297,7 @@
         l = c[:end].splitlines()
         parents = []
         author = committer = None
+        extra = {}
         for e in l[1:]:
             n, v = e.split(" ", 1)
             if n == "author":
@@ -295,6 +314,8 @@
                 committer = self.recode(committer)
             if n == "parent":
                 parents.append(v)
+            if n in self.copyextrakeys:
+                extra[n] = v
 
         if committer and committer != author:
             message += "\ncommitter: %s\n" % committer
@@ -304,7 +325,8 @@
 
         c = common.commit(parents=parents, date=date, author=author,
                           desc=message,
-                          rev=version)
+                          rev=version,
+                          extra=extra)
         return c
 
     def numcommits(self):