# HG changeset patch # User David Schleimer # Date 1401250344 25200 # Node ID a204fd9b5ba94d7ccf543a99b8a023f2a90043ef # Parent 664c49420d48208be5a01d966c7442ae2c02802d convert: drastically speed up git conversions We would formerly exec git cat-file once for every commit, plus once for every tree and file we wnated to read. This switches to using git cat-file's batch mode, which is much, much, much faster. Using this new code, converting the git git repo to hg ran in 106 minutes on my machine. Using the stock mercurial, it required 1239 minutes. I believe this to be typical of the speedups we will see form this patch. diff -r 664c49420d48 -r a204fd9b5ba9 hgext/convert/git.py --- a/hgext/convert/git.py Sat May 03 19:11:51 2014 +0430 +++ b/hgext/convert/git.py Tue May 27 21:12:24 2014 -0700 @@ -46,6 +46,18 @@ del os.environ['GIT_DIR'] else: os.environ['GIT_DIR'] = prevgitdir + + def gitpipe(self, s): + prevgitdir = os.environ.get('GIT_DIR') + os.environ['GIT_DIR'] = self.path + try: + return util.popen3(s) + finally: + if prevgitdir is None: + del os.environ['GIT_DIR'] + else: + os.environ['GIT_DIR'] = prevgitdir + else: def gitopen(self, s, err=None): if err == subprocess.PIPE: @@ -56,6 +68,9 @@ else: return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb') + def gitpipe(self, s): + return util.popen3('GIT_DIR=%s %s' % (self.path, s)) + def popen_with_stderr(self, s): p = subprocess.Popen(s, shell=True, bufsize=-1, close_fds=util.closefds, @@ -84,6 +99,12 @@ self.path = path self.submodules = [] + self.catfilepipe = self.gitpipe('git cat-file --batch') + + def after(self): + for f in self.catfilepipe: + f.close() + def getheads(self): if not self.rev: heads, ret = self.gitread('git rev-parse --branches --remotes') @@ -98,9 +119,17 @@ def catfile(self, rev, type): if rev == hex(nullid): raise IOError - data, ret = self.gitread("git cat-file %s %s" % (type, rev)) - if ret: + self.catfilepipe[0].write(rev+'\n') + self.catfilepipe[0].flush() + info = self.catfilepipe[1].readline().split() + if info[1] != type: raise util.Abort(_('cannot read %r object at %s') % (type, rev)) + size = int(info[2]) + data = self.catfilepipe[1].read(size) + if len(data) < size: + raise util.Abort(_('cannot read %r object at %s: %s') % (type, rev)) + # read the trailing newline + self.catfilepipe[1].read(1) return data def getfile(self, name, rev):