changeset 49575:bbbb5213d043

typing: add basic type hints to stringutil.py
author Matt Harbison <matt_harbison@yahoo.com>
date Fri, 04 Nov 2022 22:59:16 -0400
parents 2506c3ac73f4
children 53e4f44ba0e8
files mercurial/utils/stringutil.py
diffstat 1 files changed, 42 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/utils/stringutil.py	Fri Nov 04 17:54:43 2022 -0400
+++ b/mercurial/utils/stringutil.py	Fri Nov 04 22:59:16 2022 -0400
@@ -14,6 +14,11 @@
 import textwrap
 import types
 
+from typing import (
+    Optional,
+    overload,
+)
+
 from ..i18n import _
 from ..thirdparty import attr
 
@@ -30,6 +35,16 @@
 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
 
 
+@overload
+def reescape(pat: bytes) -> bytes:
+    ...
+
+
+@overload
+def reescape(pat: str) -> str:
+    ...
+
+
 def reescape(pat):
     """Drop-in replacement for re.escape."""
     # NOTE: it is intentional that this works on unicodes and not
@@ -45,12 +60,12 @@
     return pat.encode('latin1')
 
 
-def pprint(o, bprefix=False, indent=0, level=0):
+def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
     """Pretty print an object."""
     return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
 
 
-def pprintgen(o, bprefix=False, indent=0, level=0):
+def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
     """Pretty print an object to a generator of atoms.
 
     ``bprefix`` is a flag influencing whether bytestrings are preferred with
@@ -250,7 +265,7 @@
         yield pycompat.byterepr(o)
 
 
-def prettyrepr(o):
+def prettyrepr(o) -> bytes:
     """Pretty print a representation of a possibly-nested object"""
     lines = []
     rs = pycompat.byterepr(o)
@@ -281,7 +296,7 @@
     return b'\n'.join(b'  ' * l + s for l, s in lines)
 
 
-def buildrepr(r):
+def buildrepr(r) -> bytes:
     """Format an optional printable representation from unexpanded bits
 
     ========  =================================
@@ -305,12 +320,12 @@
         return pprint(r)
 
 
-def binary(s):
+def binary(s: bytes) -> bool:
     """return true if a string is binary data"""
     return bool(s and b'\0' in s)
 
 
-def _splitpattern(pattern):
+def _splitpattern(pattern: bytes):
     if pattern.startswith(b're:'):
         return b're', pattern[3:]
     elif pattern.startswith(b'literal:'):
@@ -318,7 +333,7 @@
     return b'literal', pattern
 
 
-def stringmatcher(pattern, casesensitive=True):
+def stringmatcher(pattern: bytes, casesensitive: bool = True):
     """
     accepts a string, possibly starting with 're:' or 'literal:' prefix.
     returns the matcher name, pattern, and matcher function.
@@ -379,7 +394,7 @@
     raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
 
 
-def substringregexp(pattern, flags=0):
+def substringregexp(pattern: bytes, flags: int = 0):
     """Build a regexp object from a string pattern possibly starting with
     're:' or 'literal:' prefix.
 
@@ -431,7 +446,7 @@
     raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
 
 
-def shortuser(user):
+def shortuser(user: bytes) -> bytes:
     """Return a short representation of a user name or email address."""
     f = user.find(b'@')
     if f >= 0:
@@ -448,7 +463,7 @@
     return user
 
 
-def emailuser(user):
+def emailuser(user: bytes) -> bytes:
     """Return the user portion of an email address."""
     f = user.find(b'@')
     if f >= 0:
@@ -459,7 +474,7 @@
     return user
 
 
-def email(author):
+def email(author: bytes) -> bytes:
     '''get email of author.'''
     r = author.find(b'>')
     if r == -1:
@@ -467,7 +482,7 @@
     return author[author.find(b'<') + 1 : r]
 
 
-def person(author):
+def person(author: bytes) -> bytes:
     """Returns the name before an email address,
     interpreting it as per RFC 5322
 
@@ -612,7 +627,7 @@
     return mailmap
 
 
-def mapname(mailmap, author):
+def mapname(mailmap, author: bytes) -> bytes:
     """Returns the author field according to the mailmap cache, or
     the original author field.
 
@@ -663,7 +678,7 @@
 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
 
 
-def isauthorwellformed(author):
+def isauthorwellformed(author: bytes) -> bool:
     """Return True if the author field is well formed
     (ie "Contributor Name <contrib@email.dom>")
 
@@ -685,7 +700,7 @@
     return _correctauthorformat.match(author) is not None
 
 
-def firstline(text):
+def firstline(text: bytes) -> bytes:
     """Return the first line of the input"""
     # Try to avoid running splitlines() on the whole string
     i = text.find(b'\n')
@@ -697,12 +712,13 @@
         return b''
 
 
-def ellipsis(text, maxlength=400):
+def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
     """Trim string to at most maxlength (default: 400) columns in display."""
     return encoding.trim(text, maxlength, ellipsis=b'...')
 
 
-def escapestr(s):
+def escapestr(s: bytes) -> bytes:
+    # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
     if isinstance(s, memoryview):
         s = bytes(s)
     # call underlying function of s.encode('string_escape') directly for
@@ -710,7 +726,7 @@
     return codecs.escape_encode(s)[0]  # pytype: disable=module-attr
 
 
-def unescapestr(s):
+def unescapestr(s: bytes) -> bytes:
     return codecs.escape_decode(s)[0]  # pytype: disable=module-attr
 
 
@@ -724,7 +740,7 @@
         return pycompat.bytestr(encoding.strtolocal(str(obj)))
 
 
-def uirepr(s):
+def uirepr(s: bytes) -> bytes:
     # Avoid double backslash in Windows path repr()
     return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
 
@@ -838,7 +854,9 @@
     return tw(**kwargs)
 
 
-def wrap(line, width, initindent=b'', hangindent=b''):
+def wrap(
+    line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
+) -> bytes:
     maxindent = max(len(hangindent), len(initindent))
     if width <= maxindent:
         # adjust for weird terminal size
@@ -875,7 +893,7 @@
 }
 
 
-def parsebool(s):
+def parsebool(s: bytes) -> Optional[bool]:
     """Parse s into a boolean.
 
     If s is not a valid boolean, returns None.
@@ -883,7 +901,8 @@
     return _booleans.get(s.lower(), None)
 
 
-def parselist(value):
+# TODO: make arg mandatory (and fix code below?)
+def parselist(value: Optional[bytes]):
     """parse a configuration value as a list of comma/space separated strings
 
     >>> parselist(b'this,is "a small" ,test')
@@ -973,7 +992,7 @@
     return result or []
 
 
-def evalpythonliteral(s):
+def evalpythonliteral(s: bytes):
     """Evaluate a string containing a Python literal expression"""
     # We could backport our tokenizer hack to rewrite '' to u'' if we want
     return ast.literal_eval(s.decode('latin1'))