revlog: make _partialmatch fail fast on almost-hex inputs
Before this change, resolving a revision like [
0123456789^] on
a large repo can take multiple seconds because:
- hg does not realize this is a revset, so it tries various things,
including _partialmatch(b"
0123456789^")
- after the rust lookup fails, it falls back to pure hg
- pure hg takes all-but-last chars and converts them to binary,
which *succeeds*, so it does the expensive part.
--- a/mercurial/revlog.py Tue Jul 12 01:13:56 2022 +0200
+++ b/mercurial/revlog.py Mon Aug 15 16:12:41 2022 +0100
@@ -235,6 +235,8 @@
b' expected %d bytes from offset %d, data size is %d'
)
+hexdigits = b'0123456789abcdefABCDEF'
+
class revlog:
"""
@@ -1509,7 +1511,7 @@
ambiguous = True
# fall through to slow path that filters hidden revisions
except (AttributeError, ValueError):
- # we are pure python, or key was too short to search radix tree
+ # we are pure python, or key is not hex
pass
if ambiguous:
raise error.AmbiguousPrefixLookupError(
@@ -1523,6 +1525,11 @@
# hex(node)[:...]
l = len(id) // 2 * 2 # grab an even number of digits
try:
+ # we're dropping the last digit, so let's check that it's hex,
+ # to avoid the expensive computation below if it's not
+ if len(id) % 2 > 0:
+ if not (id[-1] in hexdigits):
+ return None
prefix = bin(id[:l])
except binascii.Error:
pass