comparison hgext/convert/hg.py @ 24395:216fa1ba9993

convert: optimize convert of files that are unmodified from p2 in merges Conversion of a merge starts with p1 and re-adds the files that were changed in the merge or came unmodified from p2. Files that are unmodified from p1 will thus not be touched and take no time. Files that are unmodified from p2 would be retrieved and rehashed. They would end up getting the same hash as in p2 and end up reusing the filelog entry and look like the p1 case ... but it was slow. Instead, make getchanges also return 'files that are unmodified from p2' so the sink can reuse the existing p2 entry instead of calling getfile. Reuse of filelog entries can make a big difference when files are big and with long revlong chains so they take time to retrieve and hash, or when using an expensive custom getfile function (think http://mercurial.selenic.com/wiki/ConvertExtension#Customization with a code reformatter). This in combination with changes to reuse filectx entries in localrepo._filecommit make 'unchanged from p2' almost as fast as 'unchanged from p1'. This is so far only implemented for the combination of hg source and hg sink. This is a refactoring/optimization. It is covered by existing tests and show no changes - which is a good thing.
author Mads Kiilerich <madski@unity3d.com>
date Thu, 19 Mar 2015 17:40:19 +0100
parents 6ddc86eedc3b
children 884ef09cf658
comparison
equal deleted inserted replaced
24394:03163826b4e6 24395:216fa1ba9993
130 if not revid: 130 if not revid:
131 continue 131 continue
132 fp.write('%s %s\n' % (revid, s[1])) 132 fp.write('%s %s\n' % (revid, s[1]))
133 return fp.getvalue() 133 return fp.getvalue()
134 134
135 def putcommit(self, files, copies, parents, commit, source, revmap, full): 135 def putcommit(self, files, copies, parents, commit, source, revmap, full,
136 cleanp2):
136 files = dict(files) 137 files = dict(files)
138
137 def getfilectx(repo, memctx, f): 139 def getfilectx(repo, memctx, f):
140 if p2ctx and f in cleanp2 and f not in copies:
141 self.ui.debug('reusing %s from p2\n' % f)
142 return p2ctx[f]
138 try: 143 try:
139 v = files[f] 144 v = files[f]
140 except KeyError: 145 except KeyError:
141 return None 146 return None
142 data, mode = source.getfile(f, v) 147 data, mode = source.getfile(f, v)
197 extra['convert_revision'] = commit.rev 202 extra['convert_revision'] = commit.rev
198 203
199 while parents: 204 while parents:
200 p1 = p2 205 p1 = p2
201 p2 = parents.pop(0) 206 p2 = parents.pop(0)
207 p2ctx = None
208 if p2 != nullid:
209 p2ctx = self.repo[p2]
202 fileset = set(files) 210 fileset = set(files)
203 if full: 211 if full:
204 fileset.update(self.repo[p1]) 212 fileset.update(self.repo[p1])
205 fileset.update(self.repo[p2]) 213 fileset.update(self.repo[p2])
206 ctx = context.memctx(self.repo, (p1, p2), text, fileset, 214 ctx = context.memctx(self.repo, (p1, p2), text, fileset,
380 files = m + a + r 388 files = m + a + r
381 copyfiles = m + a 389 copyfiles = m + a
382 # getcopies() is also run for roots and before filtering so missing 390 # getcopies() is also run for roots and before filtering so missing
383 # revlogs are detected early 391 # revlogs are detected early
384 copies = self.getcopies(ctx, parents, copyfiles) 392 copies = self.getcopies(ctx, parents, copyfiles)
393 cleanp2 = set()
394 if len(parents) == 2:
395 cleanp2.update(self.repo.status(parents[1].node(), ctx.node(),
396 clean=True).clean)
385 changes = [(f, rev) for f in files if f not in self.ignored] 397 changes = [(f, rev) for f in files if f not in self.ignored]
386 changes.sort() 398 changes.sort()
387 return changes, copies 399 return changes, copies, cleanp2
388 400
389 def getcopies(self, ctx, parents, files): 401 def getcopies(self, ctx, parents, files):
390 copies = {} 402 copies = {}
391 for name in files: 403 for name in files:
392 if name in self.ignored: 404 if name in self.ignored: