tests/test-hashutil.py
author Simon Sapin <simon.sapin@octobus.net>
Fri, 30 Apr 2021 14:22:14 +0200
changeset 47134 7109a38830c9
parent 44087 dc9b53482689
child 48966 6000f5b25c9b
permissions -rw-r--r--
dirstate-tree: Fold "tracked descendants" counter update in main walk For the purpose of implementing `has_tracked_dir` (which means "has tracked descendants) without an expensive sub-tree traversal, we maintaing a counter of tracked descendants on each "directory" node of the tree-shaped dirstate. Before this changeset, mutating or inserting a node at a given path would involve: * Walking the tree from root through ancestors to find the node or the spot where to insert it * Looking at the previous node if any to decide what counter update is needed * Performing any node mutation * Walking the tree *again* to update counters in ancestor nodes When profiling `hg status` on a large repo, this second walk takes times while loading a the dirstate from disk. It turns out we have enough information to decide before he first tree walk what counter update is needed. This changeset merges the two walks, gaining ~10% of the total time for `hg update` (in the same hyperfine benchmark as the previous changeset). --- Profiling was done by compiling with this `.cargo/config`: [profile.release] debug = true then running with: py-spy record -r 500 -n -o /tmp/hg.json --format speedscope -- \ ./hg status -R $REPO --config experimental.dirstate-tree.in-memory=1 then visualizing the recorded JSON file in https://www.speedscope.app/ Differential Revision: https://phab.mercurial-scm.org/D10554

# Tests to ensure that sha1dc.sha1 is exactly a drop-in for
# hashlib.sha1 for our needs.
from __future__ import absolute_import

import hashlib
import unittest

import silenttestrunner

try:
    from mercurial.thirdparty import sha1dc
except ImportError:
    sha1dc = None


class hashertestsbase(object):
    def test_basic_hash(self):
        h = self.hasher()
        h.update(b'foo')
        self.assertEqual(
            '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33', h.hexdigest()
        )
        h.update(b'bar')
        self.assertEqual(
            '8843d7f92416211de9ebb963ff4ce28125932878', h.hexdigest()
        )

    def test_copy_hasher(self):
        h = self.hasher()
        h.update(b'foo')
        h2 = h.copy()
        h.update(b'baz')
        h2.update(b'bar')
        self.assertEqual(
            '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
        )
        self.assertEqual(
            '8843d7f92416211de9ebb963ff4ce28125932878', h2.hexdigest()
        )

    def test_init_hasher(self):
        h = self.hasher(b'initial string')
        self.assertEqual(
            b'\xc9y|n\x1f3S\xa4:\xbaJ\xca,\xc1\x1a\x9e\xb8\xd8\xdd\x86',
            h.digest(),
        )

    def test_bytes_like_types(self):
        h = self.hasher()
        h.update(bytearray(b'foo'))
        h.update(memoryview(b'baz'))
        self.assertEqual(
            '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
        )

        h = self.hasher(bytearray(b'foo'))
        h.update(b'baz')
        self.assertEqual(
            '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
        )

        h = self.hasher(memoryview(b'foo'))
        h.update(b'baz')
        self.assertEqual(
            '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
        )


class hashlibtests(unittest.TestCase, hashertestsbase):
    hasher = hashlib.sha1


if sha1dc:

    class sha1dctests(unittest.TestCase, hashertestsbase):
        hasher = sha1dc.sha1


if __name__ == '__main__':
    silenttestrunner.main(__name__)