Mercurial > hg
changeset 22470:8e0c4df28eec
convert: add support to detect git renames and copies
Git is fairly unique among VCSes in that it doesn't record copies and renames,
instead choosing to detect them on the fly. Since Mercurial expects copies and
renames to be recorded, it can be valuable to preserve this history while
converting a Git repository to Mercurial. This patch adds a new convert option,
called 'convert.git.similarity', which determines how similar files must be to
be treated as renames or copies.
author | Siddharth Agarwal <sid0@fb.com> |
---|---|
date | Fri, 12 Sep 2014 11:23:26 -0700 |
parents | 15bc0431476b |
children | cc5f94db672b |
files | hgext/convert/__init__.py hgext/convert/git.py tests/test-convert-git.t tests/test-convert.t |
diffstat | 4 files changed, 112 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/convert/__init__.py Thu Sep 11 23:57:49 2014 -0700 +++ b/hgext/convert/__init__.py Fri Sep 12 11:23:26 2014 -0700 @@ -291,6 +291,15 @@ leading 'refs/heads' stripped. Git submodules are converted to Git subrepos in Mercurial. + The following options can be set with ``--config``: + + :convert.git.similarity: specify how similar files modified in a + commit must be to be imported as renames or copies, as a + percentage between ``0`` (disabled) and ``100`` (files must be + identical). For example, ``90`` means that a delete/add pair will + be imported as a rename if more than 90% of the file hasn't + changed. The default is ``0``. + Perforce Source ###############
--- a/hgext/convert/git.py Thu Sep 11 23:57:49 2014 -0700 +++ b/hgext/convert/git.py Fri Sep 12 11:23:26 2014 -0700 @@ -94,6 +94,17 @@ if not os.path.exists(path + "/objects"): raise NoRepo(_("%s does not look like a Git repository") % path) + try: + similarity = int(ui.config('convert', 'git.similarity') or 0) + except ValueError: + raise util.Abort('convert.git.similarity must be a number') + if similarity < 0 or similarity > 100: + raise util.Abort(_('similarity must be between 0 and 100')) + if similarity > 0: + self.simopt = '--find-copies=%d%%' % similarity + else: + self.simopt = '' + checktool('git', 'git') self.path = path @@ -184,8 +195,10 @@ if full: raise util.Abort(_("convert from git do not support --full")) self.modecache = {} - fh = self.gitopen("git diff-tree -z --root -m -r %s" % version) + fh = self.gitopen("git diff-tree -z --root -m -r %s %s" % ( + self.simopt, version)) changes = [] + copies = {} seen = set() entry = None subexists = [False] @@ -194,15 +207,16 @@ lcount = len(difftree) i = 0 - def add(entry, f): + def add(entry, f, isdest): seen.add(f) h = entry[3] p = (entry[1] == "100755") s = (entry[1] == "120000") + renamesource = (not isdest and entry[4][0] == 'R') if f == '.gitmodules': subexists[0] = True - if entry[4] == 'D': + if entry[4] == 'D' or renamesource: subdeleted[0] = True changes.append(('.hgsub', hex(nullid))) else: @@ -210,6 +224,8 @@ elif entry[1] == '160000' or entry[0] == ':160000': subexists[0] = True else: + if renamesource: + h = hex(nullid) self.modecache[(f, h)] = (p and "x") or (s and "l") or "" changes.append((f, h)) @@ -223,7 +239,19 @@ continue f = l if f not in seen: - add(entry, f) + add(entry, f, False) + # A file can be copied multiple times, or modified and copied + # simultaneously. So f can be repeated even if fdest isn't. + if entry[4][0] in 'RC': + # rename or copy: next line is the destination + fdest = difftree[i] + i += 1 + if fdest not in seen: + add(entry, fdest, True) + # .gitmodules isn't imported at all, so it being copied to + # and fro doesn't really make sense + if f != '.gitmodules' and fdest != '.gitmodules': + copies[fdest] = f entry = None if fh.close(): raise util.Abort(_('cannot read changes in %s') % version) @@ -234,7 +262,7 @@ else: self.retrievegitmodules(version) changes.append(('.hgsubstate', '')) - return (changes, {}) + return (changes, copies) def getcommit(self, version): c = self.catfile(version, "commit") # read the commit hash
--- a/tests/test-convert-git.t Thu Sep 11 23:57:49 2014 -0700 +++ b/tests/test-convert-git.t Fri Sep 12 11:23:26 2014 -0700 @@ -241,8 +241,45 @@ 9277c9cc8dd4576fc01a17939b4351e5ada93466 644 foo 88dfeab657e8cf2cef3dec67b914f49791ae76b1 644 quux +test importing git renames and copies + + $ cd git-repo2 + $ git mv foo foo-renamed +since bar is not touched in this commit, this copy will not be detected + $ cp bar bar-copied + $ cp baz baz-copied + $ cp baz baz-copied2 + $ echo baz2 >> baz + $ git add bar-copied baz-copied baz-copied2 + $ commit -a -m 'rename and copy' + $ cd .. + +input validation + $ hg convert --config convert.git.similarity=foo --datesort git-repo2 fullrepo + abort: convert.git.similarity must be a number + [255] + $ hg convert --config convert.git.similarity=-1 --datesort git-repo2 fullrepo + abort: similarity must be between 0 and 100 + [255] + $ hg convert --config convert.git.similarity=101 --datesort git-repo2 fullrepo + abort: similarity must be between 0 and 100 + [255] + + $ hg -q convert --config convert.git.similarity=100 --datesort git-repo2 fullrepo + $ hg -R fullrepo status -C --change master + M baz + A bar-copied + A baz-copied + baz + A baz-copied2 + baz + A foo-renamed + foo + R foo + test binary conversion (issue1359) + $ count=19 $ mkdir git-repo3 $ cd git-repo3 $ git init-db >/dev/null 2>/dev/null @@ -398,6 +435,29 @@ $ cd ../.. +make sure rename detection doesn't break removing and adding gitmodules + + $ cd git-repo6 + $ git mv .gitmodules .gitmodules-renamed + $ commit -a -m 'rename .gitmodules' + $ git mv .gitmodules-renamed .gitmodules + $ commit -a -m 'rename .gitmodules back' + $ cd .. + + $ hg --config convert.git.similarity=100 convert -q git-repo6 git-repo6-hg + $ hg -R git-repo6-hg log -r 'tip^' -T "{desc|firstline}\n" + rename .gitmodules + $ hg -R git-repo6-hg status -C --change 'tip^' + A .gitmodules-renamed + R .hgsub + R .hgsubstate + $ hg -R git-repo6-hg log -r tip -T "{desc|firstline}\n" + rename .gitmodules back + $ hg -R git-repo6-hg status -C --change tip + A .hgsub + A .hgsubstate + R .gitmodules-renamed + convert the revision removing '.gitmodules' itself (and related submodules)
--- a/tests/test-convert.t Thu Sep 11 23:57:49 2014 -0700 +++ b/tests/test-convert.t Fri Sep 12 11:23:26 2014 -0700 @@ -244,6 +244,16 @@ converted to bookmarks with the same name, with the leading 'refs/heads' stripped. Git submodules are converted to Git subrepos in Mercurial. + The following options can be set with "--config": + + convert.git.similarity + specify how similar files modified in a commit must be to be + imported as renames or copies, as a percentage between "0" + (disabled) and "100" (files must be identical). For example, + "90" means that a delete/add pair will be imported as a + rename if more than 90% of the file hasn't changed. The + default is "0". + Perforce Source ###############