tests/md5sum.py
author FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
Fri, 03 Mar 2017 02:57:06 +0900
changeset 31220 e1d035905b2e
parent 29743 8d1cdee372e6
child 32870 3a64ac39b893
permissions -rwxr-xr-x
similar: compare between actual file contents for exact identity Before this patch, similarity detection logic (for addremove and automv) depends entirely on SHA-1 digesting. But this causes incorrect rename detection, if: - removing file A and adding file B occur at same committing, and - SHA-1 hash values of file A and B are same This may prevent security experts from managing sample files for SHAttered issue in Mercurial repository, for example. https://security.googleblog.com/2017/02/announcing-first-sha1-collision.html https://shattered.it/ Hash collision itself isn't so serious for core repository functionality of Mercurial, described by mpm as below, though. https://www.mercurial-scm.org/wiki/mpm/SHA1 This patch compares between actual file contents after hash comparison for exact identity. Even after this patch, SHA-1 is still used, because it is reasonable enough to quickly detect existence of "(almost) same" file. - replacing SHA-1 causes decreasing performance, and - replacement of it has ambiguity, yet Getting content of removed file (= rfctx.data()) at each exact comparison should be cheap enough, even though getting content of added one costs much. ======= ============== ===================== file fctx data() reads from ======= ============== ===================== removed filectx in-memory revlog data added workingfilectx storage ======= ============== =====================

#!/usr/bin/env python
#
# Based on python's Tools/scripts/md5sum.py
#
# This software may be used and distributed according to the terms
# of the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2, which is
# GPL-compatible.

from __future__ import absolute_import

import os
import sys

try:
    import hashlib
    md5 = hashlib.md5
except ImportError:
    import md5
    md5 = md5.md5

try:
    import msvcrt
    msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
    msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
except ImportError:
    pass

for filename in sys.argv[1:]:
    try:
        fp = open(filename, 'rb')
    except IOError as msg:
        sys.stderr.write('%s: Can\'t open: %s\n' % (filename, msg))
        sys.exit(1)

    m = md5()
    try:
        for data in iter(lambda: fp.read(8192), ''):
            m.update(data)
    except IOError as msg:
        sys.stderr.write('%s: I/O error: %s\n' % (filename, msg))
        sys.exit(1)
    sys.stdout.write('%s  %s\n' % (m.hexdigest(), filename))

sys.exit(0)