view contrib/dumprevlog @ 45095:8e04607023e5

procutil: ensure that procutil.std{out,err}.write() writes all bytes Python 3 offers different kind of streams and it’s not guaranteed for all of them that calling write() writes all bytes. When Python is started in unbuffered mode, sys.std{out,err}.buffer are instances of io.FileIO, whose write() can write less bytes for platform-specific reasons (e.g. Linux has a 0x7ffff000 bytes maximum and could write less if interrupted by a signal; when writing to Windows consoles, it’s limited to 32767 bytes to avoid the "not enough space" error). This can lead to silent loss of data, both when using sys.std{out,err}.buffer (which may in fact not be a buffered stream) and when using the text streams sys.std{out,err} (I’ve created a CPython bug report for that: https://bugs.python.org/issue41221). Python may fix the problem at some point. For now, we implement our own wrapper for procutil.std{out,err} that calls the raw stream’s write() method until all bytes have been written. We don’t use sys.std{out,err} for larger writes, so I think it’s not worth the effort to patch them.
author Manuel Jacob <me@manueljacob.de>
date Fri, 10 Jul 2020 12:27:58 +0200
parents 4c1b4805db57
children c102b704edb5
line wrap: on
line source

#!/usr/bin/env python
# Dump revlogs as raw data stream
# $ find .hg/store/ -name "*.i" | xargs dumprevlog > repo.dump

from __future__ import absolute_import, print_function

import sys
from mercurial import (
    encoding,
    node,
    pycompat,
    revlog,
)
from mercurial.utils import procutil

for fp in (sys.stdin, sys.stdout, sys.stderr):
    procutil.setbinary(fp)


def binopen(path, mode=b'rb'):
    if b'b' not in mode:
        mode = mode + b'b'
    return open(path, pycompat.sysstr(mode))


binopen.options = {}


def printb(data, end=b'\n'):
    sys.stdout.flush()
    procutil.stdout.write(data + end)


for f in sys.argv[1:]:
    r = revlog.revlog(binopen, encoding.strtolocal(f))
    print("file:", f)
    for i in r:
        n = r.node(i)
        p = r.parents(n)
        d = r.revision(n)
        printb(b"node: %s" % node.hex(n))
        printb(b"linkrev: %d" % r.linkrev(i))
        printb(b"parents: %s %s" % (node.hex(p[0]), node.hex(p[1])))
        printb(b"length: %d" % len(d))
        printb(b"-start-")
        printb(d)
        printb(b"-end-")