Mercurial > hg
view hgext/convert/convcmd.py @ 16719:e7bf09acd410
localrepo: add branchtip() method for faster single-branch lookups
For the PyPy repo with 744 branches and 843 branch heads, this brings
hg log -r default over NFS from:
CallCount Recursive Total(ms) Inline(ms) module:lineno(function)
3249 0 1.3222 1.3222 <open>
3244 0 0.6211 0.6211 <method 'close' of 'file' objects>
3243 0 0.0800 0.0800 <method 'read' of 'file' objects>
3241 0 0.0660 0.0660 <method 'seek' of 'file' objects>
3905 0 0.0476 0.0476 <zlib.decompress>
3281 0 2.6756 0.0472 mercurial.changelog:182(read)
+3281 0 2.5256 0.0453 +mercurial.revlog:881(revision)
+3276 0 0.0389 0.0196 +mercurial.changelog:28(decodeextra)
+6562 0 0.0123 0.0123 +<method 'split' of 'str' objects>
+6562 0 0.0408 0.0073 +mercurial.encoding:61(tolocal)
+3281 0 0.0054 0.0054 +<method 'index' of 'str' objects>
3241 0 2.2464 0.0456 mercurial.revlog:818(_loadchunk)
+3241 0 0.6205 0.6205 +<method 'close' of 'file' objects>
+3241 0 0.0765 0.0765 +<method 'read' of 'file' objects>
+3241 0 0.0660 0.0660 +<method 'seek' of 'file' objects>
+3241 0 1.4209 0.0135 +mercurial.store:374(__call__)
+3241 0 0.0122 0.0107 +mercurial.revlog:810(_addchunk)
3281 0 2.5256 0.0453 mercurial.revlog:881(revision)
+3280 0 0.0175 0.0175 +mercurial.revlog:305(rev)
+3281 0 2.2819 0.0119 +mercurial.revlog:847(_chunkraw)
+3281 0 0.0603 0.0083 +mercurial.revlog:945(_checkhash)
+3281 0 0.0051 0.0051 +mercurial.revlog:349(flags)
+3281 0 0.0040 0.0040 +<mercurial.mpatch.patches>
13682 0 0.0479 0.0248 <method 'decode' of 'str' objects>
+7418 0 0.0228 0.0076 +encodings.utf_8:15(decode)
+1 0 0.0003 0.0000 +encodings:71(search_function)
3248 0 1.3995 0.0246 mercurial.scmutil:218(__call__)
+3248 0 1.3222 1.3222 +<open>
+3248 0 0.0235 0.0184 +os.path:80(split)
+3248 0 0.0084 0.0068 +mercurial.scmutil:92(__call__)
Time: real 2.750 secs (user 0.680+0.000 sys 0.360+0.000)
down to:
CallCount Recursive Total(ms) Inline(ms) module:lineno(function)
55 31 0.0197 0.0163 <__import__>
+1 0 0.0006 0.0002 +mercurial.context:8(<module>)
+1 0 0.0042 0.0001 +mercurial.revlog:12(<module>)
+1 0 0.0002 0.0001 +mercurial.match:8(<module>)
+1 0 0.0003 0.0001 +mercurial.dirstate:7(<module>)
+1 0 0.0057 0.0001 +mercurial.changelog:8(<module>)
1 0 0.0117 0.0032 mercurial.localrepo:525(_readbranchcache)
+844 0 0.0015 0.0015 +<binascii.unhexlify>
+845 0 0.0010 0.0010 +<method 'split' of 'str' objects>
+843 0 0.0045 0.0009 +mercurial.encoding:61(tolocal)
+843 0 0.0004 0.0004 +<method 'setdefault' of 'dict' objects>
+1 0 0.0003 0.0003 +<method 'close' of 'file' objects>
3 0 0.0029 0.0029 <method 'read' of 'file' objects>
9 0 0.0018 0.0018 <open>
990 0 0.0017 0.0017 <binascii.unhexlify>
53 0 0.0016 0.0016 mercurial.demandimport:43(__init__)
862 0 0.0015 0.0015 <_codecs.utf_8_decode>
862 0 0.0037 0.0014 <method 'decode' of 'str' objects>
+862 0 0.0023 0.0008 +encodings.utf_8:15(decode)
981 0 0.0011 0.0011 <method 'split' of 'str' objects>
861 0 0.0046 0.0009 mercurial.encoding:61(tolocal)
+861 0 0.0037 0.0014 +<method 'decode' of 'str' objects>
862 0 0.0023 0.0008 encodings.utf_8:15(decode)
+862 0 0.0015 0.0015 +<_codecs.utf_8_decode>
4 0 0.0008 0.0008 <method 'close' of 'file' objects>
179 154 0.0202 0.0004 mercurial.demandimport:83(__getattribute__)
+36 11 0.0199 0.0003 +mercurial.demandimport:55(_load)
+72 0 0.0001 0.0001 +mercurial.demandimport:83(__getattribute__)
+36 0 0.0000 0.0000 +<getattr>
1 0 0.0015 0.0004 mercurial.tags:148(_readtagcache)
Time: real 0.060 secs (user 0.030+0.000 sys 0.010+0.000)
author | Brodie Rao <brodie@sf.io> |
---|---|
date | Sun, 13 May 2012 14:04:04 +0200 |
parents | f366d4c2ff34 |
children | eaf6a6d7f015 |
line wrap: on
line source
# convcmd - convert extension commands definition # # Copyright 2005-2007 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from common import NoRepo, MissingTool, SKIPREV, mapfile from cvs import convert_cvs from darcs import darcs_source from git import convert_git from hg import mercurial_source, mercurial_sink from subversion import svn_source, svn_sink from monotone import monotone_source from gnuarch import gnuarch_source from bzr import bzr_source from p4 import p4_source import filemap, common import os, shutil from mercurial import hg, util, encoding from mercurial.i18n import _ orig_encoding = 'ascii' def recode(s): if isinstance(s, unicode): return s.encode(orig_encoding, 'replace') else: return s.decode('utf-8').encode(orig_encoding, 'replace') source_converters = [ ('cvs', convert_cvs, 'branchsort'), ('git', convert_git, 'branchsort'), ('svn', svn_source, 'branchsort'), ('hg', mercurial_source, 'sourcesort'), ('darcs', darcs_source, 'branchsort'), ('mtn', monotone_source, 'branchsort'), ('gnuarch', gnuarch_source, 'branchsort'), ('bzr', bzr_source, 'branchsort'), ('p4', p4_source, 'branchsort'), ] sink_converters = [ ('hg', mercurial_sink), ('svn', svn_sink), ] def convertsource(ui, path, type, rev): exceptions = [] if type and type not in [s[0] for s in source_converters]: raise util.Abort(_('%s: invalid source repository type') % type) for name, source, sortmode in source_converters: try: if not type or name == type: return source(ui, path, rev), sortmode except (NoRepo, MissingTool), inst: exceptions.append(inst) if not ui.quiet: for inst in exceptions: ui.write("%s\n" % inst) raise util.Abort(_('%s: missing or unsupported repository') % path) def convertsink(ui, path, type): if type and type not in [s[0] for s in sink_converters]: raise util.Abort(_('%s: invalid destination repository type') % type) for name, sink in sink_converters: try: if not type or name == type: return sink(ui, path) except NoRepo, inst: ui.note(_("convert: %s\n") % inst) except MissingTool, inst: raise util.Abort('%s\n' % inst) raise util.Abort(_('%s: unknown repository type') % path) class progresssource(object): def __init__(self, ui, source, filecount): self.ui = ui self.source = source self.filecount = filecount self.retrieved = 0 def getfile(self, file, rev): self.retrieved += 1 self.ui.progress(_('getting files'), self.retrieved, item=file, total=self.filecount) return self.source.getfile(file, rev) def lookuprev(self, rev): return self.source.lookuprev(rev) def close(self): self.ui.progress(_('getting files'), None) class converter(object): def __init__(self, ui, source, dest, revmapfile, opts): self.source = source self.dest = dest self.ui = ui self.opts = opts self.commitcache = {} self.authors = {} self.authorfile = None # Record converted revisions persistently: maps source revision # ID to target revision ID (both strings). (This is how # incremental conversions work.) self.map = mapfile(ui, revmapfile) # Read first the dst author map if any authorfile = self.dest.authorfile() if authorfile and os.path.exists(authorfile): self.readauthormap(authorfile) # Extend/Override with new author map if necessary if opts.get('authormap'): self.readauthormap(opts.get('authormap')) self.authorfile = self.dest.authorfile() self.splicemap = common.parsesplicemap(opts.get('splicemap')) self.branchmap = mapfile(ui, opts.get('branchmap')) def walktree(self, heads): '''Return a mapping that identifies the uncommitted parents of every uncommitted changeset.''' visit = heads known = set() parents = {} while visit: n = visit.pop(0) if n in known or n in self.map: continue known.add(n) self.ui.progress(_('scanning'), len(known), unit=_('revisions')) commit = self.cachecommit(n) parents[n] = [] for p in commit.parents: parents[n].append(p) visit.append(p) self.ui.progress(_('scanning'), None) return parents def mergesplicemap(self, parents, splicemap): """A splicemap redefines child/parent relationships. Check the map contains valid revision identifiers and merge the new links in the source graph. """ for c in splicemap: if c not in parents: if not self.dest.hascommit(self.map.get(c, c)): # Could be in source but not converted during this run self.ui.warn(_('splice map revision %s is not being ' 'converted, ignoring\n') % c) continue pc = [] for p in splicemap[c]: # We do not have to wait for nodes already in dest. if self.dest.hascommit(self.map.get(p, p)): continue # Parent is not in dest and not being converted, not good if p not in parents: raise util.Abort(_('unknown splice map parent: %s') % p) pc.append(p) parents[c] = pc def toposort(self, parents, sortmode): '''Return an ordering such that every uncommitted changeset is preceeded by all its uncommitted ancestors.''' def mapchildren(parents): """Return a (children, roots) tuple where 'children' maps parent revision identifiers to children ones, and 'roots' is the list of revisions without parents. 'parents' must be a mapping of revision identifier to its parents ones. """ visit = parents.keys() seen = set() children = {} roots = [] while visit: n = visit.pop(0) if n in seen: continue seen.add(n) # Ensure that nodes without parents are present in the # 'children' mapping. children.setdefault(n, []) hasparent = False for p in parents[n]: if p not in self.map: visit.append(p) hasparent = True children.setdefault(p, []).append(n) if not hasparent: roots.append(n) return children, roots # Sort functions are supposed to take a list of revisions which # can be converted immediately and pick one def makebranchsorter(): """If the previously converted revision has a child in the eligible revisions list, pick it. Return the list head otherwise. Branch sort attempts to minimize branch switching, which is harmful for Mercurial backend compression. """ prev = [None] def picknext(nodes): next = nodes[0] for n in nodes: if prev[0] in parents[n]: next = n break prev[0] = next return next return picknext def makesourcesorter(): """Source specific sort.""" keyfn = lambda n: self.commitcache[n].sortkey def picknext(nodes): return sorted(nodes, key=keyfn)[0] return picknext def makedatesorter(): """Sort revisions by date.""" dates = {} def getdate(n): if n not in dates: dates[n] = util.parsedate(self.commitcache[n].date) return dates[n] def picknext(nodes): return min([(getdate(n), n) for n in nodes])[1] return picknext if sortmode == 'branchsort': picknext = makebranchsorter() elif sortmode == 'datesort': picknext = makedatesorter() elif sortmode == 'sourcesort': picknext = makesourcesorter() else: raise util.Abort(_('unknown sort mode: %s') % sortmode) children, actives = mapchildren(parents) s = [] pendings = {} while actives: n = picknext(actives) actives.remove(n) s.append(n) # Update dependents list for c in children.get(n, []): if c not in pendings: pendings[c] = [p for p in parents[c] if p not in self.map] try: pendings[c].remove(n) except ValueError: raise util.Abort(_('cycle detected between %s and %s') % (recode(c), recode(n))) if not pendings[c]: # Parents are converted, node is eligible actives.insert(0, c) pendings[c] = None if len(s) != len(parents): raise util.Abort(_("not all revisions were sorted")) return s def writeauthormap(self): authorfile = self.authorfile if authorfile: self.ui.status(_('Writing author map file %s\n') % authorfile) ofile = open(authorfile, 'w+') for author in self.authors: ofile.write("%s=%s\n" % (author, self.authors[author])) ofile.close() def readauthormap(self, authorfile): afile = open(authorfile, 'r') for line in afile: line = line.strip() if not line or line.startswith('#'): continue try: srcauthor, dstauthor = line.split('=', 1) except ValueError: msg = _('Ignoring bad line in author map file %s: %s\n') self.ui.warn(msg % (authorfile, line.rstrip())) continue srcauthor = srcauthor.strip() dstauthor = dstauthor.strip() if self.authors.get(srcauthor) in (None, dstauthor): msg = _('mapping author %s to %s\n') self.ui.debug(msg % (srcauthor, dstauthor)) self.authors[srcauthor] = dstauthor continue m = _('overriding mapping for author %s, was %s, will be %s\n') self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor)) afile.close() def cachecommit(self, rev): commit = self.source.getcommit(rev) commit.author = self.authors.get(commit.author, commit.author) commit.branch = self.branchmap.get(commit.branch, commit.branch) self.commitcache[rev] = commit return commit def copy(self, rev): commit = self.commitcache[rev] changes = self.source.getchanges(rev) if isinstance(changes, basestring): if changes == SKIPREV: dest = SKIPREV else: dest = self.map[changes] self.map[rev] = dest return files, copies = changes pbranches = [] if commit.parents: for prev in commit.parents: if prev not in self.commitcache: self.cachecommit(prev) pbranches.append((self.map[prev], self.commitcache[prev].branch)) self.dest.setbranch(commit.branch, pbranches) try: parents = self.splicemap[rev] self.ui.status(_('spliced in %s as parents of %s\n') % (parents, rev)) parents = [self.map.get(p, p) for p in parents] except KeyError: parents = [b[0] for b in pbranches] source = progresssource(self.ui, self.source, len(files)) newnode = self.dest.putcommit(files, copies, parents, commit, source, self.map) source.close() self.source.converted(rev, newnode) self.map[rev] = newnode def convert(self, sortmode): try: self.source.before() self.dest.before() self.source.setrevmap(self.map) self.ui.status(_("scanning source...\n")) heads = self.source.getheads() parents = self.walktree(heads) self.mergesplicemap(parents, self.splicemap) self.ui.status(_("sorting...\n")) t = self.toposort(parents, sortmode) num = len(t) c = None self.ui.status(_("converting...\n")) for i, c in enumerate(t): num -= 1 desc = self.commitcache[c].desc if "\n" in desc: desc = desc.splitlines()[0] # convert log message to local encoding without using # tolocal() because the encoding.encoding convert() # uses is 'utf-8' self.ui.status("%d %s\n" % (num, recode(desc))) self.ui.note(_("source: %s\n") % recode(c)) self.ui.progress(_('converting'), i, unit=_('revisions'), total=len(t)) self.copy(c) self.ui.progress(_('converting'), None) tags = self.source.gettags() ctags = {} for k in tags: v = tags[k] if self.map.get(v, SKIPREV) != SKIPREV: ctags[k] = self.map[v] if c and ctags: nrev, tagsparent = self.dest.puttags(ctags) if nrev and tagsparent: # write another hash correspondence to override the previous # one so we don't end up with extra tag heads tagsparents = [e for e in self.map.iteritems() if e[1] == tagsparent] if tagsparents: self.map[tagsparents[0][0]] = nrev bookmarks = self.source.getbookmarks() cbookmarks = {} for k in bookmarks: v = bookmarks[k] if self.map.get(v, SKIPREV) != SKIPREV: cbookmarks[k] = self.map[v] if c and cbookmarks: self.dest.putbookmarks(cbookmarks) self.writeauthormap() finally: self.cleanup() def cleanup(self): try: self.dest.after() finally: self.source.after() self.map.close() def convert(ui, src, dest=None, revmapfile=None, **opts): global orig_encoding orig_encoding = encoding.encoding encoding.encoding = 'UTF-8' # support --authors as an alias for --authormap if not opts.get('authormap'): opts['authormap'] = opts.get('authors') if not dest: dest = hg.defaultdest(src) + "-hg" ui.status(_("assuming destination %s\n") % dest) destc = convertsink(ui, dest, opts.get('dest_type')) try: srcc, defaultsort = convertsource(ui, src, opts.get('source_type'), opts.get('rev')) except Exception: for path in destc.created: shutil.rmtree(path, True) raise sortmodes = ('branchsort', 'datesort', 'sourcesort') sortmode = [m for m in sortmodes if opts.get(m)] if len(sortmode) > 1: raise util.Abort(_('more than one sort mode specified')) sortmode = sortmode and sortmode[0] or defaultsort if sortmode == 'sourcesort' and not srcc.hasnativeorder(): raise util.Abort(_('--sourcesort is not supported by this data source')) fmap = opts.get('filemap') if fmap: srcc = filemap.filemap_source(ui, srcc, fmap) destc.setfilemapmode(True) if not revmapfile: try: revmapfile = destc.revmapfile() except Exception: revmapfile = os.path.join(destc, "map") c = converter(ui, srcc, destc, revmapfile, opts) c.convert(sortmode)