changeset 51940:54d9f496f07a

interfaces: introduce and use a protocol class for the `charencoding` module See f2832de2a46c for details when this was done for the `bdiff` module. This lets us dump the hack where the `pure` implementation was imported during the type checking phase to provide signatures for the module methods it provides. Now the protocol classes are starting to shine, because these methods are provided by `pure.charencoding` and `cext.parsers`, and references to `cffi.charencoding` and `cext.charencoding` are forwarded to them as appropriate by the `policy` module. But none of that matters, as long as the module returned provides the listed methods. The interface was copy/pasted from the `pure` module, but `jsonescapeu8fallback` is omitted because it is accessed from the `pure` module directly when the escaping fails in the primary module's `jsonescapeu8()`.
author Matt Harbison <matt_harbison@yahoo.com>
date Sat, 05 Oct 2024 15:00:37 -0400
parents 8d9767bf4adb
children e58f02e2f6a9
files mercurial/encoding.py mercurial/interfaces/modules.py
diffstat 2 files changed, 25 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/encoding.py	Fri Oct 04 23:23:24 2024 -0400
+++ b/mercurial/encoding.py	Sat Oct 05 15:00:37 2024 -0400
@@ -26,11 +26,12 @@
     pycompat,
 )
 
+from .interfaces import modules as intmod
 from .pure import charencode as charencodepure
 
 _Tlocalstr = TypeVar('_Tlocalstr', bound='localstr')
 
-charencode = policy.importmod('charencode')
+charencode: intmod.CharEncoding = policy.importmod('charencode')
 
 isasciistr = charencode.isasciistr
 asciilower = charencode.asciilower
@@ -41,15 +42,6 @@
 
 unichr = chr
 
-if typing.TYPE_CHECKING:
-    # TODO: make a stub file for .cext.charencode, and import here
-    from .pure.charencode import (
-        asciilower,
-        asciiupper,
-        isasciistr,
-        jsonescapeu8fast as _jsonescapeu8fast,
-    )
-
 
 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
 # "Unicode Subtleties"), so we need to ignore them in some places for
--- a/mercurial/interfaces/modules.py	Fri Oct 04 23:23:24 2024 -0400
+++ b/mercurial/interfaces/modules.py	Sat Oct 05 15:00:37 2024 -0400
@@ -50,3 +50,26 @@
 
     xdiffblocks: Optional[BDiffBlocksFnc]
     """This method is currently only available in the ``cext`` module."""
+
+
+class CharEncoding(Protocol):
+    """A Protocol class for the various charencoding module implementations."""
+
+    def isasciistr(self, s: bytes) -> bool:
+        """Can the byte string be decoded with the ``ascii`` codec?"""
+
+    def asciilower(self, s: bytes) -> bytes:
+        """convert a string to lowercase if ASCII
+
+        Raises UnicodeDecodeError if non-ASCII characters are found."""
+
+    def asciiupper(self, s: bytes) -> bytes:
+        """convert a string to uppercase if ASCII
+
+        Raises UnicodeDecodeError if non-ASCII characters are found."""
+
+    def jsonescapeu8fast(self, u8chars: bytes, paranoid: bool) -> bytes:
+        """Convert a UTF-8 byte string to JSON-escaped form (fast path)
+
+        Raises ValueError if non-ASCII characters have to be escaped.
+        """