comparison hgext/convert/git.py @ 21630:a204fd9b5ba9

convert: drastically speed up git conversions We would formerly exec git cat-file once for every commit, plus once for every tree and file we wnated to read. This switches to using git cat-file's batch mode, which is much, much, much faster. Using this new code, converting the git git repo to hg ran in 106 minutes on my machine. Using the stock mercurial, it required 1239 minutes. I believe this to be typical of the speedups we will see form this patch.
author David Schleimer <dschleimer@fb.com>
date Tue, 27 May 2014 21:12:24 -0700
parents e8203629371b
children cf599f8a2da8
comparison
equal deleted inserted replaced
21629:664c49420d48 21630:a204fd9b5ba9
44 finally: 44 finally:
45 if prevgitdir is None: 45 if prevgitdir is None:
46 del os.environ['GIT_DIR'] 46 del os.environ['GIT_DIR']
47 else: 47 else:
48 os.environ['GIT_DIR'] = prevgitdir 48 os.environ['GIT_DIR'] = prevgitdir
49
50 def gitpipe(self, s):
51 prevgitdir = os.environ.get('GIT_DIR')
52 os.environ['GIT_DIR'] = self.path
53 try:
54 return util.popen3(s)
55 finally:
56 if prevgitdir is None:
57 del os.environ['GIT_DIR']
58 else:
59 os.environ['GIT_DIR'] = prevgitdir
60
49 else: 61 else:
50 def gitopen(self, s, err=None): 62 def gitopen(self, s, err=None):
51 if err == subprocess.PIPE: 63 if err == subprocess.PIPE:
52 (sin, so, se) = util.popen3('GIT_DIR=%s %s' % (self.path, s)) 64 (sin, so, se) = util.popen3('GIT_DIR=%s %s' % (self.path, s))
53 return so 65 return so
54 elif err == subprocess.STDOUT: 66 elif err == subprocess.STDOUT:
55 return self.popen_with_stderr(s) 67 return self.popen_with_stderr(s)
56 else: 68 else:
57 return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb') 69 return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb')
58 70
71 def gitpipe(self, s):
72 return util.popen3('GIT_DIR=%s %s' % (self.path, s))
73
59 def popen_with_stderr(self, s): 74 def popen_with_stderr(self, s):
60 p = subprocess.Popen(s, shell=True, bufsize=-1, 75 p = subprocess.Popen(s, shell=True, bufsize=-1,
61 close_fds=util.closefds, 76 close_fds=util.closefds,
62 stdin=subprocess.PIPE, 77 stdin=subprocess.PIPE,
63 stdout=subprocess.PIPE, 78 stdout=subprocess.PIPE,
81 96
82 checktool('git', 'git') 97 checktool('git', 'git')
83 98
84 self.path = path 99 self.path = path
85 self.submodules = [] 100 self.submodules = []
101
102 self.catfilepipe = self.gitpipe('git cat-file --batch')
103
104 def after(self):
105 for f in self.catfilepipe:
106 f.close()
86 107
87 def getheads(self): 108 def getheads(self):
88 if not self.rev: 109 if not self.rev:
89 heads, ret = self.gitread('git rev-parse --branches --remotes') 110 heads, ret = self.gitread('git rev-parse --branches --remotes')
90 heads = heads.splitlines() 111 heads = heads.splitlines()
96 return heads 117 return heads
97 118
98 def catfile(self, rev, type): 119 def catfile(self, rev, type):
99 if rev == hex(nullid): 120 if rev == hex(nullid):
100 raise IOError 121 raise IOError
101 data, ret = self.gitread("git cat-file %s %s" % (type, rev)) 122 self.catfilepipe[0].write(rev+'\n')
102 if ret: 123 self.catfilepipe[0].flush()
124 info = self.catfilepipe[1].readline().split()
125 if info[1] != type:
103 raise util.Abort(_('cannot read %r object at %s') % (type, rev)) 126 raise util.Abort(_('cannot read %r object at %s') % (type, rev))
127 size = int(info[2])
128 data = self.catfilepipe[1].read(size)
129 if len(data) < size:
130 raise util.Abort(_('cannot read %r object at %s: %s') % (type, rev))
131 # read the trailing newline
132 self.catfilepipe[1].read(1)
104 return data 133 return data
105 134
106 def getfile(self, name, rev): 135 def getfile(self, name, rev):
107 if name == '.hgsub': 136 if name == '.hgsub':
108 data = '\n'.join([m.hgsub() for m in self.submoditer()]) 137 data = '\n'.join([m.hgsub() for m in self.submoditer()])