Mercurial > hg
view hgext/git/manifest.py @ 45095:8e04607023e5
procutil: ensure that procutil.std{out,err}.write() writes all bytes
Python 3 offers different kind of streams and it’s not guaranteed for all of
them that calling write() writes all bytes.
When Python is started in unbuffered mode, sys.std{out,err}.buffer are
instances of io.FileIO, whose write() can write less bytes for
platform-specific reasons (e.g. Linux has a 0x7ffff000 bytes maximum and could
write less if interrupted by a signal; when writing to Windows consoles, it’s
limited to 32767 bytes to avoid the "not enough space" error). This can lead to
silent loss of data, both when using sys.std{out,err}.buffer (which may in fact
not be a buffered stream) and when using the text streams sys.std{out,err}
(I’ve created a CPython bug report for that:
https://bugs.python.org/issue41221).
Python may fix the problem at some point. For now, we implement our own wrapper
for procutil.std{out,err} that calls the raw stream’s write() method until all
bytes have been written. We don’t use sys.std{out,err} for larger writes, so I
think it’s not worth the effort to patch them.
author | Manuel Jacob <me@manueljacob.de> |
---|---|
date | Fri, 10 Jul 2020 12:27:58 +0200 |
parents | f294b4e14fd0 |
children | c8695439d7e3 |
line wrap: on
line source
from __future__ import absolute_import from mercurial import ( match as matchmod, pathutil, pycompat, util, ) from mercurial.interfaces import ( repository, util as interfaceutil, ) from . import gitutil pygit2 = gitutil.get_pygit2() @interfaceutil.implementer(repository.imanifestdict) class gittreemanifest(object): """Expose git trees (and optionally a builder's overlay) as a manifestdict. Very similar to mercurial.manifest.treemanifest. """ def __init__(self, git_repo, root_tree, pending_changes): """Initializer. Args: git_repo: The git_repo we're walking (required to look up child trees). root_tree: The root Git tree object for this manifest. pending_changes: A dict in which pending changes will be tracked. The enclosing memgittreemanifestctx will use this to construct any required Tree objects in Git during it's `write()` method. """ self._git_repo = git_repo self._tree = root_tree if pending_changes is None: pending_changes = {} # dict of path: Optional[Tuple(node, flags)] self._pending_changes = pending_changes def _resolve_entry(self, path): """Given a path, load its node and flags, or raise KeyError if missing. This takes into account any pending writes in the builder. """ upath = pycompat.fsdecode(path) ent = None if path in self._pending_changes: val = self._pending_changes[path] if val is None: raise KeyError return val t = self._tree comps = upath.split('/') te = self._tree for comp in comps[:-1]: te = te[comp] t = self._git_repo[te.id] ent = t[comps[-1]] if ent.filemode == pygit2.GIT_FILEMODE_BLOB: flags = b'' elif ent.filemode == pygit2.GIT_FILEMODE_BLOB_EXECUTABLE: flags = b'x' elif ent.filemode == pygit2.GIT_FILEMODE_LINK: flags = b'l' else: raise ValueError('unsupported mode %s' % oct(ent.filemode)) return ent.id.raw, flags def __getitem__(self, path): return self._resolve_entry(path)[0] def find(self, path): return self._resolve_entry(path) def __len__(self): return len(list(self.walk(matchmod.always()))) def __nonzero__(self): try: next(iter(self)) return True except StopIteration: return False __bool__ = __nonzero__ def __contains__(self, path): try: self._resolve_entry(path) return True except KeyError: return False def iterkeys(self): return self.walk(matchmod.always()) def keys(self): return list(self.iterkeys()) def __iter__(self): return self.iterkeys() def __setitem__(self, path, node): self._pending_changes[path] = node, self.flags(path) def __delitem__(self, path): # TODO: should probably KeyError for already-deleted files? self._pending_changes[path] = None def filesnotin(self, other, match=None): if match is not None: match = matchmod.badmatch(match, lambda path, msg: None) sm2 = set(other.walk(match)) return {f for f in self.walk(match) if f not in sm2} return {f for f in self if f not in other} @util.propertycache def _dirs(self): return pathutil.dirs(self) def hasdir(self, dir): return dir in self._dirs def diff(self, other, match=lambda x: True, clean=False): '''Finds changes between the current manifest and m2. The result is returned as a dict with filename as key and values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the nodeid in the current/other manifest and fl1/fl2 is the flag in the current/other manifest. Where the file does not exist, the nodeid will be None and the flags will be the empty string. ''' result = {} def _iterativediff(t1, t2, subdir): """compares two trees and appends new tree nodes to examine to the stack""" if t1 is None: t1 = {} if t2 is None: t2 = {} for e1 in t1: realname = subdir + pycompat.fsencode(e1.name) if e1.type == pygit2.GIT_OBJ_TREE: try: e2 = t2[e1.name] if e2.type != pygit2.GIT_OBJ_TREE: e2 = None except KeyError: e2 = None stack.append((realname + b'/', e1, e2)) else: n1, fl1 = self.find(realname) try: e2 = t2[e1.name] n2, fl2 = other.find(realname) except KeyError: e2 = None n2, fl2 = (None, b'') if e2 is not None and e2.type == pygit2.GIT_OBJ_TREE: stack.append((realname + b'/', None, e2)) if not match(realname): continue if n1 != n2 or fl1 != fl2: result[realname] = ((n1, fl1), (n2, fl2)) elif clean: result[realname] = None for e2 in t2: if e2.name in t1: continue realname = subdir + pycompat.fsencode(e2.name) if e2.type == pygit2.GIT_OBJ_TREE: stack.append((realname + b'/', None, e2)) elif match(realname): n2, fl2 = other.find(realname) result[realname] = ((None, b''), (n2, fl2)) stack = [] _iterativediff(self._tree, other._tree, b'') while stack: subdir, t1, t2 = stack.pop() # stack is populated in the function call _iterativediff(t1, t2, subdir) return result def setflag(self, path, flag): node, unused_flag = self._resolve_entry(path) self._pending_changes[path] = node, flag def get(self, path, default=None): try: return self._resolve_entry(path)[0] except KeyError: return default def flags(self, path): try: return self._resolve_entry(path)[1] except KeyError: return b'' def copy(self): pass def items(self): for f in self: # TODO: build a proper iterator version of this yield self[f] def iteritems(self): return self.items() def iterentries(self): for f in self: # TODO: build a proper iterator version of this yield self._resolve_entry(f) def text(self): assert False # TODO can this method move out of the manifest iface? def _walkonetree(self, tree, match, subdir): for te in tree: # TODO: can we prune dir walks with the matcher? realname = subdir + pycompat.fsencode(te.name) if te.type == pygit2.GIT_OBJ_TREE: for inner in self._walkonetree( self._git_repo[te.id], match, realname + b'/' ): yield inner elif match(realname): yield pycompat.fsencode(realname) def walk(self, match): # TODO: this is a very lazy way to merge in the pending # changes. There is absolutely room for optimization here by # being clever about walking over the sets... baseline = set(self._walkonetree(self._tree, match, b'')) deleted = {p for p, v in self._pending_changes.items() if v is None} pend = {p for p in self._pending_changes if match(p)} return iter(sorted((baseline | pend) - deleted)) @interfaceutil.implementer(repository.imanifestrevisionstored) class gittreemanifestctx(object): def __init__(self, repo, gittree): self._repo = repo self._tree = gittree def read(self): return gittreemanifest(self._repo, self._tree, None) def readfast(self, shallow=False): return self.read() def copy(self): # NB: it's important that we return a memgittreemanifestctx # because the caller expects a mutable manifest. return memgittreemanifestctx(self._repo, self._tree) def find(self, path): return self.read()[path] @interfaceutil.implementer(repository.imanifestrevisionwritable) class memgittreemanifestctx(object): def __init__(self, repo, tree): self._repo = repo self._tree = tree # dict of path: Optional[Tuple(node, flags)] self._pending_changes = {} def read(self): return gittreemanifest(self._repo, self._tree, self._pending_changes) def copy(self): # TODO: if we have a builder in play, what should happen here? # Maybe we can shuffle copy() into the immutable interface. return memgittreemanifestctx(self._repo, self._tree) def write(self, transaction, link, p1, p2, added, removed, match=None): # We're not (for now, anyway) going to audit filenames, so we # can ignore added and removed. # TODO what does this match argument get used for? hopefully # just narrow? assert not match or isinstance(match, matchmod.alwaysmatcher) touched_dirs = pathutil.dirs(list(self._pending_changes)) trees = { b'': self._tree, } # path: treebuilder builders = { b'': self._repo.TreeBuilder(self._tree), } # get a TreeBuilder for every tree in the touched_dirs set for d in sorted(touched_dirs, key=lambda x: (len(x), x)): if d == b'': # loaded root tree above continue comps = d.split(b'/') full = b'' for part in comps: parent = trees[full] try: new = self._repo[parent[pycompat.fsdecode(part)]] except KeyError: # new directory new = None full += b'/' + part if new is not None: # existing directory trees[full] = new builders[full] = self._repo.TreeBuilder(new) else: # new directory, use an empty dict to easily # generate KeyError as any nested new dirs get # created. trees[full] = {} builders[full] = self._repo.TreeBuilder() for f, info in self._pending_changes.items(): if b'/' not in f: dirname = b'' basename = f else: dirname, basename = f.rsplit(b'/', 1) dirname = b'/' + dirname if info is None: builders[dirname].remove(pycompat.fsdecode(basename)) else: n, fl = info mode = { b'': pygit2.GIT_FILEMODE_BLOB, b'x': pygit2.GIT_FILEMODE_BLOB_EXECUTABLE, b'l': pygit2.GIT_FILEMODE_LINK, }[fl] builders[dirname].insert( pycompat.fsdecode(basename), gitutil.togitnode(n), mode ) # This visits the buffered TreeBuilders in deepest-first # order, bubbling up the edits. for b in sorted(builders, key=len, reverse=True): if b == b'': break cb = builders[b] dn, bn = b.rsplit(b'/', 1) builders[dn].insert( pycompat.fsdecode(bn), cb.write(), pygit2.GIT_FILEMODE_TREE ) return builders[b''].write().raw