# HG changeset patch # User Gregory Szorc # Date 1519503740 28800 # Node ID bfe38f787d5bd9577ca1190e42a7c772f42e36ab # Parent e7411fb7ba7ff616e9657efebfa75478042d0ca2 util: add a file object proxy that can notify observers There are various places in Mercurial where we may want to instrument low-level I/O. The use cases I can think of all involve development-type activities like monitoring the raw bytes passing through a file (for testing and debugging), counting the number of I/O function calls (for performance monitoring), and changing the behavior of I/O function calls (e.g. simulating a failure) (to facilitate testing). This commit invents a mechanism to wrap a file object so we can observe activity on it. We have similar functionality in badserverext.py. But that's a test-only extension and is pretty specific to the HTTP server. I would like a mechanism in core that is sufficiently generic so it can be used by multiple consumers, including `hg debug*` commands. The added code consists of a proxy type for file objects. It is bound to an "observer," which receives callbacks whenever I/O methods are called. We also add an implementation of an observer that logs specific I/O events. This observer will be used in an upcoming commit to record low-level wire protocol activity. A helper function to convert a file object into an observed file object has also been implemented. I don't anticipate any critical functionality in core using these types. So I don't think explicit test coverage is worth implementing. Differential Revision: https://phab.mercurial-scm.org/D2462 diff -r e7411fb7ba7f -r bfe38f787d5b mercurial/util.py --- a/mercurial/util.py Sat Feb 24 12:07:21 2018 -0800 +++ b/mercurial/util.py Sat Feb 24 12:22:20 2018 -0800 @@ -488,6 +488,228 @@ env=env) return p.stdin, p.stdout, p.stderr, p +class fileobjectproxy(object): + """A proxy around file objects that tells a watcher when events occur. + + This type is intended to only be used for testing purposes. Think hard + before using it in important code. + """ + __slots__ = ( + r'_orig', + r'_observer', + ) + + def __init__(self, fh, observer): + object.__setattr__(self, '_orig', fh) + object.__setattr__(self, '_observer', observer) + + def __getattribute__(self, name): + ours = { + # IOBase + r'close', + # closed if a property + r'fileno', + r'flush', + r'isatty', + r'readable', + r'readline', + r'readlines', + r'seek', + r'seekable', + r'tell', + r'truncate', + r'writable', + r'writelines', + # RawIOBase + r'read', + r'readall', + r'readinto', + r'write', + # BufferedIOBase + # raw is a property + r'detach', + # read defined above + r'read1', + # readinto defined above + # write defined above + } + + # We only observe some methods. + if name in ours: + return object.__getattribute__(self, name) + + return getattr(object.__getattribute__(self, r'_orig'), name) + + def __delattr__(self, name): + return delattr(object.__getattribute__(self, r'_orig'), name) + + def __setattr__(self, name, value): + return setattr(object.__getattribute__(self, r'_orig'), name, value) + + def __iter__(self): + return object.__getattribute__(self, r'_orig').__iter__() + + def _observedcall(self, name, *args, **kwargs): + # Call the original object. + orig = object.__getattribute__(self, r'_orig') + res = getattr(orig, name)(*args, **kwargs) + + # Call a method on the observer of the same name with arguments + # so it can react, log, etc. + observer = object.__getattribute__(self, r'_observer') + fn = getattr(observer, name, None) + if fn: + fn(res, *args, **kwargs) + + return res + + def close(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'close', *args, **kwargs) + + def fileno(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'fileno', *args, **kwargs) + + def flush(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'flush', *args, **kwargs) + + def isatty(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'isatty', *args, **kwargs) + + def readable(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'readable', *args, **kwargs) + + def readline(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'readline', *args, **kwargs) + + def readlines(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'readlines', *args, **kwargs) + + def seek(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'seek', *args, **kwargs) + + def seekable(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'seekable', *args, **kwargs) + + def tell(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'tell', *args, **kwargs) + + def truncate(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'truncate', *args, **kwargs) + + def writable(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'writable', *args, **kwargs) + + def writelines(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'writelines', *args, **kwargs) + + def read(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'read', *args, **kwargs) + + def readall(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'readall', *args, **kwargs) + + def readinto(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'readinto', *args, **kwargs) + + def write(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'write', *args, **kwargs) + + def detach(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'detach', *args, **kwargs) + + def read1(self, *args, **kwargs): + return object.__getattribute__(self, r'_observedcall')( + r'read1', *args, **kwargs) + +DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)} +DATA_ESCAPE_MAP.update({ + b'\\': b'\\\\', + b'\r': br'\r', + b'\n': br'\n', +}) +DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]') + +def escapedata(s): + return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s) + +class fileobjectobserver(object): + """Logs file object activity.""" + def __init__(self, fh, name, reads=True, writes=True, logdata=False): + self.fh = fh + self.name = name + self.logdata = logdata + self.reads = reads + self.writes = writes + + def _writedata(self, data): + if not self.logdata: + self.fh.write('\n') + return + + # Simple case writes all data on a single line. + if b'\n' not in data: + self.fh.write(': %s\n' % escapedata(data)) + return + + # Data with newlines is written to multiple lines. + self.fh.write(':\n') + lines = data.splitlines(True) + for line in lines: + self.fh.write('%s> %s\n' % (self.name, escapedata(line))) + + def read(self, res, size=-1): + if not self.reads: + return + + self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res))) + self._writedata(res) + + def readline(self, res, limit=-1): + if not self.reads: + return + + self.fh.write('%s> readline() -> %d' % (self.name, len(res))) + self._writedata(res) + + def write(self, res, data): + if not self.writes: + return + + self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res)) + self._writedata(data) + + def flush(self, res): + if not self.writes: + return + + self.fh.write('%s> flush() -> %r\n' % (self.name, res)) + +def makeloggingfileobject(logh, fh, name, reads=True, writes=True, + logdata=False): + """Turn a file object into a logging file object.""" + + observer = fileobjectobserver(logh, name, reads=reads, writes=writes, + logdata=logdata) + return fileobjectproxy(fh, observer) + def version(): """Return version information if available.""" try: