storageutil: implement file identifier resolution method (BC)
revlog.lookup() has a number of advanced features, including partial
node matching.
These advanced features aren't needed for file id lookup because file
identifiers are almost always from internal sources. (An exception to
this is hgweb, which appears to have some code paths that attempt
to resolve a user-supplied string to a node.)
This commit implements a new function for resolving file identifiers
to nodes. It behaves like revlog.lookup() but without the
advanced features.
Tests reveal behavior changes:
* Partial hex nodes are no longer resolved to nodes.
* "-1" now returns nullid instead of raising LookupError.
* "0" on an empty store now raises LookupError instead of returning
nullid.
I'm not sure why "-1" wasn't recognized before. But it seems reasonable
to accept it as a value since integer -1 resolves to nullid.
These changes all seem reasonable to me. And with the exception of
partial hex node matching, we may want to consider changing
revlog.lookup() as well.
Differential Revision: https://phab.mercurial-scm.org/D4797
# storageutil.py - Storage functionality agnostic of backend implementation.
#
# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import hashlib
import re
from ..i18n import _
from ..node import (
bin,
nullid,
)
from .. import (
error,
pycompat,
)
_nullhash = hashlib.sha1(nullid)
def hashrevisionsha1(text, p1, p2):
"""Compute the SHA-1 for revision data and its parents.
This hash combines both the current file contents and its history
in a manner that makes it easy to distinguish nodes with the same
content in the revision graph.
"""
# As of now, if one of the parent node is null, p2 is null
if p2 == nullid:
# deep copy of a hash is faster than creating one
s = _nullhash.copy()
s.update(p1)
else:
# none of the parent nodes are nullid
if p1 < p2:
a = p1
b = p2
else:
a = p2
b = p1
s = hashlib.sha1(a)
s.update(b)
s.update(text)
return s.digest()
METADATA_RE = re.compile(b'\x01\n')
def parsemeta(text):
"""Parse metadata header from revision data.
Returns a 2-tuple of (metadata, offset), where both can be None if there
is no metadata.
"""
# text can be buffer, so we can't use .startswith or .index
if text[:2] != b'\x01\n':
return None, None
s = METADATA_RE.search(text, 2).start()
mtext = text[2:s]
meta = {}
for l in mtext.splitlines():
k, v = l.split(b': ', 1)
meta[k] = v
return meta, s + 2
def packmeta(meta, text):
"""Add metadata to fulltext to produce revision text."""
keys = sorted(meta)
metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
return b'\x01\n%s\x01\n%s' % (metatext, text)
def iscensoredtext(text):
meta = parsemeta(text)[0]
return meta and b'censored' in meta
def filtermetadata(text):
"""Extract just the revision data from source text.
Returns ``text`` unless it has a metadata header, in which case we return
a new buffer without hte metadata.
"""
if not text.startswith(b'\x01\n'):
return text
offset = text.index(b'\x01\n', 2)
return text[offset + 2:]
def iterrevs(storelen, start=0, stop=None):
"""Iterate over revision numbers in a store."""
step = 1
if stop is not None:
if start > stop:
step = -1
stop += step
if stop > storelen:
stop = storelen
else:
stop = storelen
return pycompat.xrange(start, stop, step)
def fileidlookup(store, fileid, identifier):
"""Resolve the file node for a value.
``store`` is an object implementing the ``ifileindex`` interface.
``fileid`` can be:
* A 20 byte binary node.
* An integer revision number
* A 40 byte hex node.
* A bytes that can be parsed as an integer representing a revision number.
``identifier`` is used to populate ``error.LookupError`` with an identifier
for the store.
Raises ``error.LookupError`` on failure.
"""
if isinstance(fileid, int):
return store.node(fileid)
if len(fileid) == 20:
try:
store.rev(fileid)
return fileid
except error.LookupError:
pass
if len(fileid) == 40:
try:
rawnode = bin(fileid)
store.rev(rawnode)
return rawnode
except TypeError:
pass
try:
rev = int(fileid)
if b'%d' % rev != fileid:
raise ValueError
try:
return store.node(rev)
except (IndexError, TypeError):
pass
except (ValueError, OverflowError):
pass
raise error.LookupError(fileid, identifier, _('no match found'))