convert: drastically speed up git conversions
We would formerly exec git cat-file once for every commit, plus once for
every tree and file we wnated to read. This switches to using git
cat-file's batch mode, which is much, much, much faster.
Using this new code, converting the git git repo to hg ran in 106
minutes on my machine. Using the stock mercurial, it required 1239
minutes. I believe this to be typical of the speedups we will see
form this patch.
--- a/hgext/convert/git.py Sat May 03 19:11:51 2014 +0430
+++ b/hgext/convert/git.py Tue May 27 21:12:24 2014 -0700
@@ -46,6 +46,18 @@
del os.environ['GIT_DIR']
else:
os.environ['GIT_DIR'] = prevgitdir
+
+ def gitpipe(self, s):
+ prevgitdir = os.environ.get('GIT_DIR')
+ os.environ['GIT_DIR'] = self.path
+ try:
+ return util.popen3(s)
+ finally:
+ if prevgitdir is None:
+ del os.environ['GIT_DIR']
+ else:
+ os.environ['GIT_DIR'] = prevgitdir
+
else:
def gitopen(self, s, err=None):
if err == subprocess.PIPE:
@@ -56,6 +68,9 @@
else:
return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb')
+ def gitpipe(self, s):
+ return util.popen3('GIT_DIR=%s %s' % (self.path, s))
+
def popen_with_stderr(self, s):
p = subprocess.Popen(s, shell=True, bufsize=-1,
close_fds=util.closefds,
@@ -84,6 +99,12 @@
self.path = path
self.submodules = []
+ self.catfilepipe = self.gitpipe('git cat-file --batch')
+
+ def after(self):
+ for f in self.catfilepipe:
+ f.close()
+
def getheads(self):
if not self.rev:
heads, ret = self.gitread('git rev-parse --branches --remotes')
@@ -98,9 +119,17 @@
def catfile(self, rev, type):
if rev == hex(nullid):
raise IOError
- data, ret = self.gitread("git cat-file %s %s" % (type, rev))
- if ret:
+ self.catfilepipe[0].write(rev+'\n')
+ self.catfilepipe[0].flush()
+ info = self.catfilepipe[1].readline().split()
+ if info[1] != type:
raise util.Abort(_('cannot read %r object at %s') % (type, rev))
+ size = int(info[2])
+ data = self.catfilepipe[1].read(size)
+ if len(data) < size:
+ raise util.Abort(_('cannot read %r object at %s: %s') % (type, rev))
+ # read the trailing newline
+ self.catfilepipe[1].read(1)
return data
def getfile(self, name, rev):