contrib/undumprevlog
author Boris Feld <boris.feld@octobus.net>
Mon, 17 Dec 2018 10:42:19 +0100
changeset 40978 42f59d3f714d
parent 39947 a063b84ce064
child 43659 99e231afc29c
permissions -rwxr-xr-x
delta: exclude base candidate much smaller than the target If a revision's full text is that much bigger than a base candidate full text, we no longer consider that candidate. This solves a pathological case we encountered on a very specify repository. It contains a long series of changesets with a very small manifest (one file) co-existing with others changesets using a very large manifest. Without this filtering, we ended up considering a large number of tiny full snapshots as a potential base. It resulted in very large delta (the size of the full text) and mercurial spending 99% of its time compressing these deltas. The timing of a commit moved from about 400s to about 10s (still slow, but not ridiculously slow).

#!/usr/bin/env python
# Undump a dump from dumprevlog
# $ hg init
# $ undumprevlog < repo.dump

from __future__ import absolute_import, print_function

import sys
from mercurial import (
    encoding,
    node,
    pycompat,
    revlog,
    transaction,
    vfs as vfsmod,
)
from mercurial.utils import (
    procutil,
)

for fp in (sys.stdin, sys.stdout, sys.stderr):
    procutil.setbinary(fp)

opener = vfsmod.vfs(b'.', False)
tr = transaction.transaction(sys.stderr.write, opener, {b'store': opener},
                             b"undump.journal")
while True:
    l = sys.stdin.readline()
    if not l:
        break
    if l.startswith("file:"):
        f = encoding.strtolocal(l[6:-1])
        r = revlog.revlog(opener, f)
        pycompat.stdout.write(b'%s\n' % f)
    elif l.startswith("node:"):
        n = node.bin(l[6:-1])
    elif l.startswith("linkrev:"):
        lr = int(l[9:-1])
    elif l.startswith("parents:"):
        p = l[9:-1].split()
        p1 = node.bin(p[0])
        p2 = node.bin(p[1])
    elif l.startswith("length:"):
        length = int(l[8:-1])
        sys.stdin.readline() # start marker
        d = encoding.strtolocal(sys.stdin.read(length))
        sys.stdin.readline() # end marker
        r.addrevision(d, tr, lr, p1, p2)

tr.close()