Mercurial > hg
view hgext/convert/git.py @ 45095:8e04607023e5
procutil: ensure that procutil.std{out,err}.write() writes all bytes
Python 3 offers different kind of streams and it’s not guaranteed for all of
them that calling write() writes all bytes.
When Python is started in unbuffered mode, sys.std{out,err}.buffer are
instances of io.FileIO, whose write() can write less bytes for
platform-specific reasons (e.g. Linux has a 0x7ffff000 bytes maximum and could
write less if interrupted by a signal; when writing to Windows consoles, it’s
limited to 32767 bytes to avoid the "not enough space" error). This can lead to
silent loss of data, both when using sys.std{out,err}.buffer (which may in fact
not be a buffered stream) and when using the text streams sys.std{out,err}
(I’ve created a CPython bug report for that:
https://bugs.python.org/issue41221).
Python may fix the problem at some point. For now, we implement our own wrapper
for procutil.std{out,err} that calls the raw stream’s write() method until all
bytes have been written. We don’t use sys.std{out,err} for larger writes, so I
think it’s not worth the effort to patch them.
author | Manuel Jacob <me@manueljacob.de> |
---|---|
date | Fri, 10 Jul 2020 12:27:58 +0200 |
parents | 8ff1ecfadcd1 |
children | 59fa3890d40a |
line wrap: on
line source
# git.py - git support for the convert extension # # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import import os from mercurial.i18n import _ from mercurial import ( config, error, node as nodemod, pycompat, ) from . import common class submodule(object): def __init__(self, path, node, url): self.path = path self.node = node self.url = url def hgsub(self): return b"%s = [git]%s" % (self.path, self.url) def hgsubstate(self): return b"%s %s" % (self.node, self.path) # Keys in extra fields that should not be copied if the user requests. bannedextrakeys = { # Git commit object built-ins. b'tree', b'parent', b'author', b'committer', # Mercurial built-ins. b'branch', b'close', } class convert_git(common.converter_source, common.commandline): # Windows does not support GIT_DIR= construct while other systems # cannot remove environment variable. Just assume none have # both issues. def _gitcmd(self, cmd, *args, **kwargs): return cmd(b'--git-dir=%s' % self.path, *args, **kwargs) def gitrun0(self, *args, **kwargs): return self._gitcmd(self.run0, *args, **kwargs) def gitrun(self, *args, **kwargs): return self._gitcmd(self.run, *args, **kwargs) def gitrunlines0(self, *args, **kwargs): return self._gitcmd(self.runlines0, *args, **kwargs) def gitrunlines(self, *args, **kwargs): return self._gitcmd(self.runlines, *args, **kwargs) def gitpipe(self, *args, **kwargs): return self._gitcmd(self._run3, *args, **kwargs) def __init__(self, ui, repotype, path, revs=None): super(convert_git, self).__init__(ui, repotype, path, revs=revs) common.commandline.__init__(self, ui, b'git') # Pass an absolute path to git to prevent from ever being interpreted # as a URL path = os.path.abspath(path) if os.path.isdir(path + b"/.git"): path += b"/.git" if not os.path.exists(path + b"/objects"): raise common.NoRepo( _(b"%s does not look like a Git repository") % path ) # The default value (50) is based on the default for 'git diff'. similarity = ui.configint(b'convert', b'git.similarity') if similarity < 0 or similarity > 100: raise error.Abort(_(b'similarity must be between 0 and 100')) if similarity > 0: self.simopt = [b'-C%d%%' % similarity] findcopiesharder = ui.configbool( b'convert', b'git.findcopiesharder' ) if findcopiesharder: self.simopt.append(b'--find-copies-harder') renamelimit = ui.configint(b'convert', b'git.renamelimit') self.simopt.append(b'-l%d' % renamelimit) else: self.simopt = [] common.checktool(b'git', b'git') self.path = path self.submodules = [] self.catfilepipe = self.gitpipe(b'cat-file', b'--batch') self.copyextrakeys = self.ui.configlist(b'convert', b'git.extrakeys') banned = set(self.copyextrakeys) & bannedextrakeys if banned: raise error.Abort( _(b'copying of extra key is forbidden: %s') % _(b', ').join(sorted(banned)) ) committeractions = self.ui.configlist( b'convert', b'git.committeractions' ) messagedifferent = None messagealways = None for a in committeractions: if a.startswith((b'messagedifferent', b'messagealways')): k = a v = None if b'=' in a: k, v = a.split(b'=', 1) if k == b'messagedifferent': messagedifferent = v or b'committer:' elif k == b'messagealways': messagealways = v or b'committer:' if messagedifferent and messagealways: raise error.Abort( _( b'committeractions cannot define both ' b'messagedifferent and messagealways' ) ) dropcommitter = b'dropcommitter' in committeractions replaceauthor = b'replaceauthor' in committeractions if dropcommitter and replaceauthor: raise error.Abort( _( b'committeractions cannot define both ' b'dropcommitter and replaceauthor' ) ) if dropcommitter and messagealways: raise error.Abort( _( b'committeractions cannot define both ' b'dropcommitter and messagealways' ) ) if not messagedifferent and not messagealways: messagedifferent = b'committer:' self.committeractions = { b'dropcommitter': dropcommitter, b'replaceauthor': replaceauthor, b'messagedifferent': messagedifferent, b'messagealways': messagealways, } def after(self): for f in self.catfilepipe: f.close() def getheads(self): if not self.revs: output, status = self.gitrun( b'rev-parse', b'--branches', b'--remotes' ) heads = output.splitlines() if status: raise error.Abort(_(b'cannot retrieve git heads')) else: heads = [] for rev in self.revs: rawhead, ret = self.gitrun(b'rev-parse', b'--verify', rev) heads.append(rawhead[:-1]) if ret: raise error.Abort(_(b'cannot retrieve git head "%s"') % rev) return heads def catfile(self, rev, ftype): if rev == nodemod.nullhex: raise IOError self.catfilepipe[0].write(rev + b'\n') self.catfilepipe[0].flush() info = self.catfilepipe[1].readline().split() if info[1] != ftype: raise error.Abort( _(b'cannot read %r object at %s') % (pycompat.bytestr(ftype), rev) ) size = int(info[2]) data = self.catfilepipe[1].read(size) if len(data) < size: raise error.Abort( _(b'cannot read %r object at %s: unexpected size') % (ftype, rev) ) # read the trailing newline self.catfilepipe[1].read(1) return data def getfile(self, name, rev): if rev == nodemod.nullhex: return None, None if name == b'.hgsub': data = b'\n'.join([m.hgsub() for m in self.submoditer()]) mode = b'' elif name == b'.hgsubstate': data = b'\n'.join([m.hgsubstate() for m in self.submoditer()]) mode = b'' else: data = self.catfile(rev, b"blob") mode = self.modecache[(name, rev)] return data, mode def submoditer(self): null = nodemod.nullhex for m in sorted(self.submodules, key=lambda p: p.path): if m.node != null: yield m def parsegitmodules(self, content): """Parse the formatted .gitmodules file, example file format: [submodule "sub"]\n \tpath = sub\n \turl = git://giturl\n """ self.submodules = [] c = config.config() # Each item in .gitmodules starts with whitespace that cant be parsed c.parse( b'.gitmodules', b'\n'.join(line.strip() for line in content.split(b'\n')), ) for sec in c.sections(): s = c[sec] if b'url' in s and b'path' in s: self.submodules.append(submodule(s[b'path'], b'', s[b'url'])) def retrievegitmodules(self, version): modules, ret = self.gitrun( b'show', b'%s:%s' % (version, b'.gitmodules') ) if ret: # This can happen if a file is in the repo that has permissions # 160000, but there is no .gitmodules file. self.ui.warn( _(b"warning: cannot read submodules config file in %s\n") % version ) return try: self.parsegitmodules(modules) except error.ParseError: self.ui.warn( _(b"warning: unable to parse .gitmodules in %s\n") % version ) return for m in self.submodules: node, ret = self.gitrun(b'rev-parse', b'%s:%s' % (version, m.path)) if ret: continue m.node = node.strip() def getchanges(self, version, full): if full: raise error.Abort(_(b"convert from git does not support --full")) self.modecache = {} cmd = ( [b'diff-tree', b'-z', b'--root', b'-m', b'-r'] + self.simopt + [version] ) output, status = self.gitrun(*cmd) if status: raise error.Abort(_(b'cannot read changes in %s') % version) changes = [] copies = {} seen = set() entry = None subexists = [False] subdeleted = [False] difftree = output.split(b'\x00') lcount = len(difftree) i = 0 skipsubmodules = self.ui.configbool(b'convert', b'git.skipsubmodules') def add(entry, f, isdest): seen.add(f) h = entry[3] p = entry[1] == b"100755" s = entry[1] == b"120000" renamesource = not isdest and entry[4][0] == b'R' if f == b'.gitmodules': if skipsubmodules: return subexists[0] = True if entry[4] == b'D' or renamesource: subdeleted[0] = True changes.append((b'.hgsub', nodemod.nullhex)) else: changes.append((b'.hgsub', b'')) elif entry[1] == b'160000' or entry[0] == b':160000': if not skipsubmodules: subexists[0] = True else: if renamesource: h = nodemod.nullhex self.modecache[(f, h)] = (p and b"x") or (s and b"l") or b"" changes.append((f, h)) while i < lcount: l = difftree[i] i += 1 if not entry: if not l.startswith(b':'): continue entry = tuple(pycompat.bytestr(p) for p in l.split()) continue f = l if entry[4][0] == b'C': copysrc = f copydest = difftree[i] i += 1 f = copydest copies[copydest] = copysrc if f not in seen: add(entry, f, False) # A file can be copied multiple times, or modified and copied # simultaneously. So f can be repeated even if fdest isn't. if entry[4][0] == b'R': # rename: next line is the destination fdest = difftree[i] i += 1 if fdest not in seen: add(entry, fdest, True) # .gitmodules isn't imported at all, so it being copied to # and fro doesn't really make sense if f != b'.gitmodules' and fdest != b'.gitmodules': copies[fdest] = f entry = None if subexists[0]: if subdeleted[0]: changes.append((b'.hgsubstate', nodemod.nullhex)) else: self.retrievegitmodules(version) changes.append((b'.hgsubstate', b'')) return (changes, copies, set()) def getcommit(self, version): c = self.catfile(version, b"commit") # read the commit hash end = c.find(b"\n\n") message = c[end + 2 :] message = self.recode(message) l = c[:end].splitlines() parents = [] author = committer = None extra = {} for e in l[1:]: n, v = e.split(b" ", 1) if n == b"author": p = v.split() tm, tz = p[-2:] author = b" ".join(p[:-2]) if author[0] == b"<": author = author[1:-1] author = self.recode(author) if n == b"committer": p = v.split() tm, tz = p[-2:] committer = b" ".join(p[:-2]) if committer[0] == b"<": committer = committer[1:-1] committer = self.recode(committer) if n == b"parent": parents.append(v) if n in self.copyextrakeys: extra[n] = v if self.committeractions[b'dropcommitter']: committer = None elif self.committeractions[b'replaceauthor']: author = committer if committer: messagealways = self.committeractions[b'messagealways'] messagedifferent = self.committeractions[b'messagedifferent'] if messagealways: message += b'\n%s %s\n' % (messagealways, committer) elif messagedifferent and author != committer: message += b'\n%s %s\n' % (messagedifferent, committer) tzs, tzh, tzm = tz[-5:-4] + b"1", tz[-4:-2], tz[-2:] tz = -int(tzs) * (int(tzh) * 3600 + int(tzm)) date = tm + b" " + (b"%d" % tz) saverev = self.ui.configbool(b'convert', b'git.saverev') c = common.commit( parents=parents, date=date, author=author, desc=message, rev=version, extra=extra, saverev=saverev, ) return c def numcommits(self): output, ret = self.gitrunlines(b'rev-list', b'--all') if ret: raise error.Abort( _(b'cannot retrieve number of commits in %s') % self.path ) return len(output) def gettags(self): tags = {} alltags = {} output, status = self.gitrunlines(b'ls-remote', b'--tags', self.path) if status: raise error.Abort(_(b'cannot read tags from %s') % self.path) prefix = b'refs/tags/' # Build complete list of tags, both annotated and bare ones for line in output: line = line.strip() if line.startswith(b"error:") or line.startswith(b"fatal:"): raise error.Abort(_(b'cannot read tags from %s') % self.path) node, tag = line.split(None, 1) if not tag.startswith(prefix): continue alltags[tag[len(prefix) :]] = node # Filter out tag objects for annotated tag refs for tag in alltags: if tag.endswith(b'^{}'): tags[tag[:-3]] = alltags[tag] else: if tag + b'^{}' in alltags: continue else: tags[tag] = alltags[tag] return tags def getchangedfiles(self, version, i): changes = [] if i is None: output, status = self.gitrunlines( b'diff-tree', b'--root', b'-m', b'-r', version ) if status: raise error.Abort(_(b'cannot read changes in %s') % version) for l in output: if b"\t" not in l: continue m, f = l[:-1].split(b"\t") changes.append(f) else: output, status = self.gitrunlines( b'diff-tree', b'--name-only', b'--root', b'-r', version, b'%s^%d' % (version, i + 1), b'--', ) if status: raise error.Abort(_(b'cannot read changes in %s') % version) changes = [f.rstrip(b'\n') for f in output] return changes def getbookmarks(self): bookmarks = {} # Handle local and remote branches remoteprefix = self.ui.config(b'convert', b'git.remoteprefix') reftypes = [ # (git prefix, hg prefix) (b'refs/remotes/origin/', remoteprefix + b'/'), (b'refs/heads/', b''), ] exclude = { b'refs/remotes/origin/HEAD', } try: output, status = self.gitrunlines(b'show-ref') for line in output: line = line.strip() rev, name = line.split(None, 1) # Process each type of branch for gitprefix, hgprefix in reftypes: if not name.startswith(gitprefix) or name in exclude: continue name = b'%s%s' % (hgprefix, name[len(gitprefix) :]) bookmarks[name] = rev except Exception: pass return bookmarks def checkrevformat(self, revstr, mapname=b'splicemap'): """ git revision string is a 40 byte hex """ self.checkhexformat(revstr, mapname)