convert: add config option to copy extra keys from Git commits
authorGregory Szorc <gregory.szorc@gmail.com>
Thu, 22 Dec 2016 23:28:11 -0700
changeset 30663 1f21a6835604
parent 30662 1404146157d9
child 30664 ced0d686ecb3
convert: add config option to copy extra keys from Git commits Git commit objects support storing arbitrary key-value metadata. While there is no user-facing mechanism in Git to record these values, some tools do record data here. Currently, `hg convert` only handles the "author," "committer," and "parent" keys in Git commit objects. All other keys are ignored. This means that any custom keys are lost when converting Git repos to Mercurial. This patch implements support for copying a whitelist of extra keys from Git commit objects to the "extras" dict of the destination. As the added tests demonstate, this allows extra metadata to be preserved during the conversion process. This patch stops short of converting all metadata to "extras." We could potentially implement this via `convert.git.extrakeys=*` or similar. But copying everything by default is a bit dangerous because if Git adds new keys to commit objects, we could find ourselves copying things that shouldn't be copied! This patch also assumes the source key is the same as the destination key. We could implement support for prefixing the output key to distinguish it as coming from Git. But until this feature is needed, I'm inclined to hold off implementing it.
hgext/convert/__init__.py
hgext/convert/git.py
tests/test-convert-git.t
tests/test-convert.t
--- a/hgext/convert/__init__.py	Thu Dec 22 09:26:47 2016 -0800
+++ b/hgext/convert/__init__.py	Thu Dec 22 23:28:11 2016 -0700
@@ -327,6 +327,12 @@
         ``convert.git.similarity`` is greater than 0. The default is
         ``400``.
 
+    :convert.git.extrakeys: list of extra keys from commit metadata to copy to
+        the destination. Some Git repositories store extra metadata in commits.
+        By default, this non-default metadata will be lost during conversion.
+        Setting this config option can retain that metadata. Some built-in
+        keys such as ``parent`` and ``branch`` are not allowed to be copied.
+
     :convert.git.remoteprefix: remote refs are converted as bookmarks with
         ``convert.git.remoteprefix`` as a prefix followed by a /. The default
         is 'remote'.
--- a/hgext/convert/git.py	Thu Dec 22 09:26:47 2016 -0800
+++ b/hgext/convert/git.py	Thu Dec 22 23:28:11 2016 -0700
@@ -31,6 +31,18 @@
     def hgsubstate(self):
         return "%s %s" % (self.node, self.path)
 
+# Keys in extra fields that should not be copied if the user requests.
+bannedextrakeys = set([
+    # Git commit object built-ins.
+    'tree',
+    'parent',
+    'author',
+    'committer',
+    # Mercurial built-ins.
+    'branch',
+    'close',
+])
+
 class convert_git(common.converter_source, common.commandline):
     # Windows does not support GIT_DIR= construct while other systems
     # cannot remove environment variable. Just assume none have
@@ -92,6 +104,12 @@
 
         self.catfilepipe = self.gitpipe('cat-file', '--batch')
 
+        self.copyextrakeys = self.ui.configlist('convert', 'git.extrakeys')
+        banned = set(self.copyextrakeys) & bannedextrakeys
+        if banned:
+            raise error.Abort(_('copying of extra key is forbidden: %s') %
+                              _(', ').join(sorted(banned)))
+
     def after(self):
         for f in self.catfilepipe:
             f.close()
@@ -279,6 +297,7 @@
         l = c[:end].splitlines()
         parents = []
         author = committer = None
+        extra = {}
         for e in l[1:]:
             n, v = e.split(" ", 1)
             if n == "author":
@@ -295,6 +314,8 @@
                 committer = self.recode(committer)
             if n == "parent":
                 parents.append(v)
+            if n in self.copyextrakeys:
+                extra[n] = v
 
         if committer and committer != author:
             message += "\ncommitter: %s\n" % committer
@@ -304,7 +325,8 @@
 
         c = common.commit(parents=parents, date=date, author=author,
                           desc=message,
-                          rev=version)
+                          rev=version,
+                          extra=extra)
         return c
 
     def numcommits(self):
--- a/tests/test-convert-git.t	Thu Dec 22 09:26:47 2016 -0800
+++ b/tests/test-convert-git.t	Thu Dec 22 23:28:11 2016 -0700
@@ -804,3 +804,127 @@
 
 #endif
 
+Conversion of extra commit metadata to extras works
+
+  $ git init gitextras >/dev/null 2>/dev/null
+  $ cd gitextras
+  $ touch foo
+  $ git add foo
+  $ commit -m initial
+  $ echo 1 > foo
+  $ tree=`git write-tree`
+
+Git doesn't provider a user-facing API to write extra metadata into the
+commit, so create the commit object by hand
+
+  $ git hash-object -t commit -w --stdin << EOF
+  > tree ${tree}
+  > parent ba6b1344e977ece9e00958dbbf17f1f09384b2c1
+  > author test <test@example.com> 1000000000 +0000
+  > committer test <test@example.com> 1000000000 +0000
+  > extra-1 extra-1
+  > extra-2 extra-2 with space
+  > convert_revision 0000aaaabbbbccccddddeeee
+  > 
+  > message with extras
+  > EOF
+  8123727c8361a4117d1a2d80e0c4e7d70c757f18
+
+  $ git reset --hard 8123727c8361a4117d1a2d80e0c4e7d70c757f18 > /dev/null
+
+  $ cd ..
+
+convert will not retain custom metadata keys by default
+
+  $ hg convert gitextras hgextras1
+  initializing destination hgextras1 repository
+  scanning source...
+  sorting...
+  converting...
+  1 initial
+  0 message with extras
+  updating bookmarks
+
+  $ hg -R hgextras1 log --debug -r 1
+  changeset:   1:e13a39880f68479127b2a80fa0b448cc8524aa09
+  bookmark:    master
+  tag:         tip
+  phase:       draft
+  parent:      0:dcb68977c55cd02cbd13b901df65c4b6e7b9c4b9
+  parent:      -1:0000000000000000000000000000000000000000
+  manifest:    0:6a3df4de388f3c4f8e28f4f9a814299a3cbb5f50
+  user:        test <test@example.com>
+  date:        Sun Sep 09 01:46:40 2001 +0000
+  extra:       branch=default
+  extra:       convert_revision=8123727c8361a4117d1a2d80e0c4e7d70c757f18
+  description:
+  message with extras
+  
+  
+
+Attempting to convert a banned extra is disallowed
+
+  $ hg convert --config convert.git.extrakeys=tree,parent gitextras hgextras-banned
+  initializing destination hgextras-banned repository
+  abort: copying of extra key is forbidden: parent, tree
+  [255]
+
+Converting a specific extra works
+
+  $ hg convert --config convert.git.extrakeys=extra-1 gitextras hgextras2
+  initializing destination hgextras2 repository
+  scanning source...
+  sorting...
+  converting...
+  1 initial
+  0 message with extras
+  updating bookmarks
+
+  $ hg -R hgextras2 log --debug -r 1
+  changeset:   1:d40fb205d58597e6ecfd55b16f198be5bf436391
+  bookmark:    master
+  tag:         tip
+  phase:       draft
+  parent:      0:dcb68977c55cd02cbd13b901df65c4b6e7b9c4b9
+  parent:      -1:0000000000000000000000000000000000000000
+  manifest:    0:6a3df4de388f3c4f8e28f4f9a814299a3cbb5f50
+  user:        test <test@example.com>
+  date:        Sun Sep 09 01:46:40 2001 +0000
+  extra:       branch=default
+  extra:       convert_revision=8123727c8361a4117d1a2d80e0c4e7d70c757f18
+  extra:       extra-1=extra-1
+  description:
+  message with extras
+  
+  
+
+Converting multiple extras works
+
+  $ hg convert --config convert.git.extrakeys=extra-1,extra-2 gitextras hgextras3
+  initializing destination hgextras3 repository
+  scanning source...
+  sorting...
+  converting...
+  1 initial
+  0 message with extras
+  updating bookmarks
+
+  $ hg -R hgextras3 log --debug -r 1
+  changeset:   1:0105af33379e7b6491501fd34141b7af700fe125
+  bookmark:    master
+  tag:         tip
+  phase:       draft
+  parent:      0:dcb68977c55cd02cbd13b901df65c4b6e7b9c4b9
+  parent:      -1:0000000000000000000000000000000000000000
+  manifest:    0:6a3df4de388f3c4f8e28f4f9a814299a3cbb5f50
+  user:        test <test@example.com>
+  date:        Sun Sep 09 01:46:40 2001 +0000
+  extra:       branch=default
+  extra:       convert_revision=8123727c8361a4117d1a2d80e0c4e7d70c757f18
+  extra:       extra-1=extra-1
+  extra:       extra-2=extra-2 with space
+  description:
+  message with extras
+  
+  
+
--- a/tests/test-convert.t	Thu Dec 22 09:26:47 2016 -0800
+++ b/tests/test-convert.t	Thu Dec 22 23:28:11 2016 -0700
@@ -268,6 +268,13 @@
                     computation on large projects. The option is only relevant
                     if "convert.git.similarity" is greater than 0. The default
                     is "400".
+      convert.git.extrakeys
+                    list of extra keys from commit metadata to copy to the
+                    destination. Some Git repositories store extra metadata in
+                    commits. By default, this non-default metadata will be lost
+                    during conversion. Setting this config option can retain
+                    that metadata. Some built-in keys such as "parent" and
+                    "branch" are not allowed to be copied.
       convert.git.remoteprefix
                     remote refs are converted as bookmarks with
                     "convert.git.remoteprefix" as a prefix followed by a /. The