view mercurial/node.py @ 52284:f4aede0f01af

rust-manifest: use `memchr` crate for all byte-finding needs While writing a very dumb manifest diffing algorithm for a proof-of-concept I saw that `Manifest::find_by_path` was much slower than I was expecting. It turns out that the Rust stdlib uses slow (all is relative) code when searching for byte positions for reasons ranging from portability, SIMD API stability, nobody doing the work, etc. `memch` is much faster for these purposes, so let's use it. I was measuring ~670ms of profile time in `find_by_path`, after this patch it went down to ~230ms.
author Raphaël Gomès <rgomes@octobus.net>
date Tue, 12 Nov 2024 23:20:04 +0100
parents f4733654f144
children
line wrap: on
line source

# node.py - basic nodeid manipulation for mercurial
#
# Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import annotations

import binascii

# This ugly style has a noticeable effect in manifest parsing
hex = binascii.hexlify
bin = binascii.unhexlify


def short(node):
    return hex(node[:6])


nullrev = -1

# pseudo identifier for working directory
# (experimental, so don't add too many dependencies on it)
wdirrev = 0x7FFFFFFF


class sha1nodeconstants:
    nodelen = 20

    # In hex, this is '0000000000000000000000000000000000000000'
    nullid = b"\0" * nodelen
    nullhex = hex(nullid)

    # Phony node value to stand-in for new files in some uses of
    # manifests.
    # In hex, this is '2121212121212121212121212121212121212121'
    newnodeid = b'!!!!!!!!!!!!!!!!!!!!'
    # In hex, this is '3030303030303030303030303030306164646564'
    addednodeid = b'000000000000000added'
    # In hex, this is '3030303030303030303030306d6f646966696564'
    modifiednodeid = b'000000000000modified'

    wdirfilenodeids = {newnodeid, addednodeid, modifiednodeid}

    # pseudo identifier for working directory
    # (experimental, so don't add too many dependencies on it)
    # In hex, this is 'ffffffffffffffffffffffffffffffffffffffff'
    wdirid = b"\xff" * nodelen
    wdirhex = hex(wdirid)


# legacy starting point for porting modules
nullid = sha1nodeconstants.nullid
nullhex = sha1nodeconstants.nullhex
newnodeid = sha1nodeconstants.newnodeid
addednodeid = sha1nodeconstants.addednodeid
modifiednodeid = sha1nodeconstants.modifiednodeid
wdirfilenodeids = sha1nodeconstants.wdirfilenodeids
wdirid = sha1nodeconstants.wdirid
wdirhex = sha1nodeconstants.wdirhex