Mercurial > hg
changeset 49575:bbbb5213d043
typing: add basic type hints to stringutil.py
author | Matt Harbison <matt_harbison@yahoo.com> |
---|---|
date | Fri, 04 Nov 2022 22:59:16 -0400 |
parents | 2506c3ac73f4 |
children | 53e4f44ba0e8 |
files | mercurial/utils/stringutil.py |
diffstat | 1 files changed, 42 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/utils/stringutil.py Fri Nov 04 17:54:43 2022 -0400 +++ b/mercurial/utils/stringutil.py Fri Nov 04 22:59:16 2022 -0400 @@ -14,6 +14,11 @@ import textwrap import types +from typing import ( + Optional, + overload, +) + from ..i18n import _ from ..thirdparty import attr @@ -30,6 +35,16 @@ regexbytesescapemap = {i: (b'\\' + i) for i in _respecial} +@overload +def reescape(pat: bytes) -> bytes: + ... + + +@overload +def reescape(pat: str) -> str: + ... + + def reescape(pat): """Drop-in replacement for re.escape.""" # NOTE: it is intentional that this works on unicodes and not @@ -45,12 +60,12 @@ return pat.encode('latin1') -def pprint(o, bprefix=False, indent=0, level=0): +def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes: """Pretty print an object.""" return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level)) -def pprintgen(o, bprefix=False, indent=0, level=0): +def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0): """Pretty print an object to a generator of atoms. ``bprefix`` is a flag influencing whether bytestrings are preferred with @@ -250,7 +265,7 @@ yield pycompat.byterepr(o) -def prettyrepr(o): +def prettyrepr(o) -> bytes: """Pretty print a representation of a possibly-nested object""" lines = [] rs = pycompat.byterepr(o) @@ -281,7 +296,7 @@ return b'\n'.join(b' ' * l + s for l, s in lines) -def buildrepr(r): +def buildrepr(r) -> bytes: """Format an optional printable representation from unexpanded bits ======== ================================= @@ -305,12 +320,12 @@ return pprint(r) -def binary(s): +def binary(s: bytes) -> bool: """return true if a string is binary data""" return bool(s and b'\0' in s) -def _splitpattern(pattern): +def _splitpattern(pattern: bytes): if pattern.startswith(b're:'): return b're', pattern[3:] elif pattern.startswith(b'literal:'): @@ -318,7 +333,7 @@ return b'literal', pattern -def stringmatcher(pattern, casesensitive=True): +def stringmatcher(pattern: bytes, casesensitive: bool = True): """ accepts a string, possibly starting with 're:' or 'literal:' prefix. returns the matcher name, pattern, and matcher function. @@ -379,7 +394,7 @@ raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) -def substringregexp(pattern, flags=0): +def substringregexp(pattern: bytes, flags: int = 0): """Build a regexp object from a string pattern possibly starting with 're:' or 'literal:' prefix. @@ -431,7 +446,7 @@ raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) -def shortuser(user): +def shortuser(user: bytes) -> bytes: """Return a short representation of a user name or email address.""" f = user.find(b'@') if f >= 0: @@ -448,7 +463,7 @@ return user -def emailuser(user): +def emailuser(user: bytes) -> bytes: """Return the user portion of an email address.""" f = user.find(b'@') if f >= 0: @@ -459,7 +474,7 @@ return user -def email(author): +def email(author: bytes) -> bytes: '''get email of author.''' r = author.find(b'>') if r == -1: @@ -467,7 +482,7 @@ return author[author.find(b'<') + 1 : r] -def person(author): +def person(author: bytes) -> bytes: """Returns the name before an email address, interpreting it as per RFC 5322 @@ -612,7 +627,7 @@ return mailmap -def mapname(mailmap, author): +def mapname(mailmap, author: bytes) -> bytes: """Returns the author field according to the mailmap cache, or the original author field. @@ -663,7 +678,7 @@ _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$') -def isauthorwellformed(author): +def isauthorwellformed(author: bytes) -> bool: """Return True if the author field is well formed (ie "Contributor Name <contrib@email.dom>") @@ -685,7 +700,7 @@ return _correctauthorformat.match(author) is not None -def firstline(text): +def firstline(text: bytes) -> bytes: """Return the first line of the input""" # Try to avoid running splitlines() on the whole string i = text.find(b'\n') @@ -697,12 +712,13 @@ return b'' -def ellipsis(text, maxlength=400): +def ellipsis(text: bytes, maxlength: int = 400) -> bytes: """Trim string to at most maxlength (default: 400) columns in display.""" return encoding.trim(text, maxlength, ellipsis=b'...') -def escapestr(s): +def escapestr(s: bytes) -> bytes: + # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview if isinstance(s, memoryview): s = bytes(s) # call underlying function of s.encode('string_escape') directly for @@ -710,7 +726,7 @@ return codecs.escape_encode(s)[0] # pytype: disable=module-attr -def unescapestr(s): +def unescapestr(s: bytes) -> bytes: return codecs.escape_decode(s)[0] # pytype: disable=module-attr @@ -724,7 +740,7 @@ return pycompat.bytestr(encoding.strtolocal(str(obj))) -def uirepr(s): +def uirepr(s: bytes) -> bytes: # Avoid double backslash in Windows path repr() return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') @@ -838,7 +854,9 @@ return tw(**kwargs) -def wrap(line, width, initindent=b'', hangindent=b''): +def wrap( + line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b'' +) -> bytes: maxindent = max(len(hangindent), len(initindent)) if width <= maxindent: # adjust for weird terminal size @@ -875,7 +893,7 @@ } -def parsebool(s): +def parsebool(s: bytes) -> Optional[bool]: """Parse s into a boolean. If s is not a valid boolean, returns None. @@ -883,7 +901,8 @@ return _booleans.get(s.lower(), None) -def parselist(value): +# TODO: make arg mandatory (and fix code below?) +def parselist(value: Optional[bytes]): """parse a configuration value as a list of comma/space separated strings >>> parselist(b'this,is "a small" ,test') @@ -973,7 +992,7 @@ return result or [] -def evalpythonliteral(s): +def evalpythonliteral(s: bytes): """Evaluate a string containing a Python literal expression""" # We could backport our tokenizer hack to rewrite '' to u'' if we want return ast.literal_eval(s.decode('latin1'))