contrib/fixpax.py
author Matt Harbison <matt_harbison@yahoo.com>
Fri, 17 Nov 2017 00:06:45 -0500
changeset 35476 417e8e040102
parent 28354 00f317788d33
permissions -rwxr-xr-x
lfs: verify lfs object content when transferring to and from the remote store This avoids inserting corrupt files into the usercache, and local and remote stores. One down side is that the bad file won't be available locally for forensic purposes after a remote download. I'm thinking about adding an 'incoming' directory to the local lfs store to handle the download, and then move it to the 'objects' directory after it passes verification. That would have the additional benefit of not concatenating each transfer chunk in memory until the full file is transferred. Verification isn't needed when the data is passed back through the revlog interface or when the oid was just calculated, but otherwise it is on by default. The additional overhead should be well worth avoiding problems with file based remote stores, or buggy lfs servers. Having two different verify functions is a little sad, but the full data of the blob is mostly passed around in memory, because that's what the revlog interface wants. The upload function, however, chunks up the data. It would be ideal if that was how the content is always handled, but that's probably a huge project. I don't really like printing the long hash, but `hg debugdata` isn't a public interface, and is the only way to get it. The filelog and revision info is nowhere near this area, so recommending `hg verify` is the easiest thing to do.

#!/usr/bin/env python
# fixpax - fix ownership in bdist_mpkg output
#
# Copyright 2015 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# MIT license (http://opensource.org/licenses/MIT)

"""Set file ownership to 0 in an Archive.pax.gz.
Suitable for fixing files bdist_mpkg output:
*.mpkg/Contents/Packages/*.pkg/Contents/Archive.pax.gz
"""

from __future__ import absolute_import, print_function
import gzip
import os
import sys

def fixpax(iname, oname):
    i = gzip.GzipFile(iname)
    o = gzip.GzipFile(oname, "w")

    while True:
        magic = i.read(6)
        dev = i.read(6)
        ino = i.read(6)
        mode = i.read(6)
        i.read(6) # uid
        i.read(6) # gid
        nlink = i.read(6)
        rdev = i.read(6)
        mtime = i.read(11)
        namesize = i.read(6)
        filesize = i.read(11)
        name = i.read(int(namesize, 8))
        data = i.read(int(filesize, 8))

        o.write(magic)
        o.write(dev)
        o.write(ino)
        o.write(mode)
        o.write("000000")
        o.write("000000")
        o.write(nlink)
        o.write(rdev)
        o.write(mtime)
        o.write(namesize)
        o.write(filesize)
        o.write(name)
        o.write(data)

        if name.startswith("TRAILER!!!"):
            o.write(i.read())
            break

    o.close()
    i.close()

if __name__ == '__main__':
    for iname in sys.argv[1:]:
        print('fixing file ownership in %s' % iname)
        oname = sys.argv[1] + '.tmp'
        fixpax(iname, oname)
        os.rename(oname, iname)