changeset 51929:93d872a06132

typing: add type annotations to the dirstate classes The basic procedure here was to use `merge-pyi` to merge the `git/dirstate.pyi` file in (after renaming the interface class to match), cleaning up the import statement mess, and then repeating the procedure for `mercurial/dirstate.pyi`. Surprisingly, git's dirstate had more hints inferred in its *.pyi file. After that, it was a manual examination of each method in the interface, and how they were implemented in the core and git classes to verify what was inferred by pytype, and fill in the missing gaps. Since this involved jumping around between three different files, I applied the same type info to all three at the same time. Complex types I rolled up into type aliases in the interface module, and used that as needed. That way if it changes, there's one place to edit. There are some hints still missing, and some documentation that doesn't match the signatures. They should all be marked with TODOs. There are also a bunch of methods on the core class that aren't on the Protocol class that seem like maybe they should be (like `set_tracked()`). There are even more methods missing from the git class. But that's a project for another time.
author Matt Harbison <matt_harbison@yahoo.com>
date Fri, 27 Sep 2024 12:30:37 -0400
parents 3688a984134b
children bc9ed92d4753
files hgext/git/dirstate.py mercurial/dirstate.py mercurial/interfaces/dirstate.py
diffstat 3 files changed, 247 insertions(+), 92 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/git/dirstate.py	Fri Sep 27 12:10:25 2024 -0400
+++ b/hgext/git/dirstate.py	Fri Sep 27 12:30:37 2024 -0400
@@ -3,6 +3,16 @@
 import contextlib
 import os
 
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+)
+
 from mercurial.node import sha1nodeconstants
 from mercurial import (
     dirstatemap,
@@ -96,7 +106,7 @@
         )
         return self._map
 
-    def p1(self):
+    def p1(self) -> bytes:
         try:
             return self.git.head.peel().id.raw
         except pygit2.GitError:
@@ -104,11 +114,11 @@
             # empty repository.
             return sha1nodeconstants.nullid
 
-    def p2(self):
+    def p2(self) -> bytes:
         # TODO: MERGE_HEAD? something like that, right?
         return sha1nodeconstants.nullid
 
-    def setparents(self, p1, p2=None):
+    def setparents(self, p1: bytes, p2: Optional[bytes] = None):
         if p2 is None:
             p2 = sha1nodeconstants.nullid
         assert p2 == sha1nodeconstants.nullid, b'TODO merging support'
@@ -120,17 +130,17 @@
             os.path.join(self._root, b'.git', b'index')
         )
 
-    def branch(self):
+    def branch(self) -> bytes:
         return b'default'
 
-    def parents(self):
+    def parents(self) -> List[bytes]:
         # TODO how on earth do we find p2 if a merge is in flight?
         return [self.p1(), sha1nodeconstants.nullid]
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[bytes]:
         return (pycompat.fsencode(f.path) for f in self.git.index)
 
-    def items(self):
+    def items(self) -> Iterator[Tuple[bytes, intdirstate.DirstateItemT]]:
         for ie in self.git.index:
             yield ie.path, None  # value should be a DirstateItem
 
@@ -144,14 +154,21 @@
             return b'?'
         return _STATUS_MAP[gs]
 
-    def __contains__(self, filename):
+    def __contains__(self, filename: Any) -> bool:
         try:
             gs = self.git.status_file(filename)
             return _STATUS_MAP[gs] != b'?'
         except KeyError:
             return False
 
-    def status(self, match, subrepos, ignored, clean, unknown):
+    def status(
+        self,
+        match: matchmod.basematcher,
+        subrepos: bool,
+        ignored: bool,
+        clean: bool,
+        unknown: bool,
+    ) -> intdirstate.StatusReturnT:
         listclean = clean
         # TODO handling of clean files - can we get that from git.status()?
         modified, added, removed, deleted, unknown, ignored, clean = (
@@ -224,24 +241,28 @@
             mtime_boundary,
         )
 
-    def flagfunc(self, buildfallback):
+    def flagfunc(
+        self, buildfallback: intdirstate.FlagFuncFallbackT
+    ) -> intdirstate.FlagFuncReturnT:
         # TODO we can do better
         return buildfallback()
 
-    def getcwd(self):
+    def getcwd(self) -> bytes:
         # TODO is this a good way to do this?
         return os.path.dirname(
             os.path.dirname(pycompat.fsencode(self.git.path))
         )
 
-    def get_entry(self, path):
+    def get_entry(self, path: bytes) -> intdirstate.DirstateItemT:
         """return a DirstateItem for the associated path"""
         entry = self._map.get(path)
         if entry is None:
             return DirstateItem()
         return entry
 
-    def normalize(self, path, isknown=False, ignoremissing=False):
+    def normalize(
+        self, path: bytes, isknown: bool = False, ignoremissing: bool = False
+    ) -> bytes:
         normed = util.normcase(path)
         assert normed == path, b"TODO handling of case folding: %s != %s" % (
             normed,
@@ -250,10 +271,10 @@
         return path
 
     @property
-    def _checklink(self):
+    def _checklink(self) -> bool:
         return util.checklink(os.path.dirname(pycompat.fsencode(self.git.path)))
 
-    def copies(self):
+    def copies(self) -> Dict[bytes, bytes]:
         # TODO support copies?
         return {}
 
@@ -261,18 +282,18 @@
     _filecache = set()
 
     @property
-    def is_changing_parents(self):
+    def is_changing_parents(self) -> bool:
         # TODO: we need to implement the context manager bits and
         # correctly stage/revert index edits.
         return False
 
     @property
-    def is_changing_any(self):
+    def is_changing_any(self) -> bool:
         # TODO: we need to implement the context manager bits and
         # correctly stage/revert index edits.
         return False
 
-    def write(self, tr):
+    def write(self, tr: Optional[intdirstate.TransactionT]) -> None:
         # TODO: call parent change callbacks
 
         if tr:
@@ -284,7 +305,7 @@
         else:
             self.git.index.write()
 
-    def pathto(self, f, cwd=None):
+    def pathto(self, f: bytes, cwd: Optional[bytes] = None) -> bytes:
         if cwd is None:
             cwd = self.getcwd()
         # TODO core dirstate does something about slashes here
@@ -292,11 +313,11 @@
         r = util.pathto(self._root, cwd, f)
         return r
 
-    def matches(self, match):
+    def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
         for x in self.git.index:
             p = pycompat.fsencode(x.path)
             if match(p):
-                yield p
+                yield p  # TODO: return list instead of yielding?
 
     def set_clean(self, f, parentfiledata):
         """Mark a file normal and clean."""
@@ -308,7 +329,14 @@
         # TODO: for now we just let libgit2 re-stat the file. We can
         # clearly do better.
 
-    def walk(self, match, subrepos, unknown, ignored, full=True):
+    def walk(
+        self,
+        match: matchmod.basematcher,
+        subrepos: Any,
+        unknown: bool,
+        ignored: bool,
+        full: bool = True,
+    ) -> intdirstate.WalkReturnT:
         # TODO: we need to use .status() and not iterate the index,
         # because the index doesn't force a re-walk and so `hg add` of
         # a new file without an intervening call to status will
@@ -370,7 +398,7 @@
         index.remove(pycompat.fsdecode(f))
         index.write()
 
-    def copied(self, path):
+    def copied(self, file: bytes) -> Optional[bytes]:
         # TODO: track copies?
         return None
 
@@ -387,11 +415,15 @@
         # TODO: track this maybe?
         yield
 
-    def addparentchangecallback(self, category, callback):
+    def addparentchangecallback(
+        self, category: bytes, callback: intdirstate.AddParentChangeCallbackT
+    ) -> None:
         # TODO: should this be added to the dirstate interface?
         self._plchangecallbacks[category] = callback
 
-    def setbranch(self, branch, transaction):
+    def setbranch(
+        self, branch: bytes, transaction: Optional[intdirstate.TransactionT]
+    ) -> None:
         raise error.Abort(
             b'git repos do not support branches. try using bookmarks'
         )
--- a/mercurial/dirstate.py	Fri Sep 27 12:10:25 2024 -0400
+++ b/mercurial/dirstate.py	Fri Sep 27 12:30:37 2024 -0400
@@ -13,6 +13,16 @@
 import stat
 import uuid
 
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+)
+
 from .i18n import _
 
 from hgdemandimport import tracing
@@ -396,7 +406,7 @@
         raise error.ProgrammingError(msg)
 
     @property
-    def is_changing_any(self):
+    def is_changing_any(self) -> bool:
         """Returns true if the dirstate is in the middle of a set of changes.
 
         This returns True for any kind of change.
@@ -404,7 +414,7 @@
         return self._changing_level > 0
 
     @property
-    def is_changing_parents(self):
+    def is_changing_parents(self) -> bool:
         """Returns true if the dirstate is in the middle of a set of changes
         that modify the dirstate parent.
         """
@@ -413,7 +423,7 @@
         return self._change_type == CHANGE_TYPE_PARENTS
 
     @property
-    def is_changing_files(self):
+    def is_changing_files(self) -> bool:
         """Returns true if the dirstate is in the middle of a set of changes
         that modify the files tracked or their sources.
         """
@@ -469,11 +479,11 @@
     def _pl(self):
         return self._map.parents()
 
-    def hasdir(self, d):
+    def hasdir(self, d: bytes) -> bool:
         return self._map.hastrackeddir(d)
 
     @rootcache(b'.hgignore')
-    def _ignore(self):
+    def _ignore(self) -> matchmod.basematcher:
         files = self._ignorefiles()
         if not files:
             return matchmod.never()
@@ -486,11 +496,11 @@
         return self._ui.configbool(b'ui', b'slash') and pycompat.ossep != b'/'
 
     @propertycache
-    def _checklink(self):
+    def _checklink(self) -> bool:
         return util.checklink(self._root)
 
     @propertycache
-    def _checkexec(self):
+    def _checkexec(self) -> bool:
         return bool(util.checkexec(self._root))
 
     @propertycache
@@ -502,7 +512,9 @@
         # it's safe because f is always a relative path
         return self._rootdir + f
 
-    def flagfunc(self, buildfallback):
+    def flagfunc(
+        self, buildfallback: intdirstate.FlagFuncFallbackT
+    ) -> intdirstate.FlagFuncReturnT:
         """build a callable that returns flags associated with a filename
 
         The information is extracted from three possible layers:
@@ -514,7 +526,7 @@
         # small hack to cache the result of buildfallback()
         fallback_func = []
 
-        def get_flags(x):
+        def get_flags(x: bytes) -> bytes:
             entry = None
             fallback_value = None
             try:
@@ -565,7 +577,7 @@
             return forcecwd
         return encoding.getcwd()
 
-    def getcwd(self):
+    def getcwd(self) -> bytes:
         """Return the path from which a canonical path is calculated.
 
         This path should be used to resolve file patterns or to convert
@@ -585,7 +597,7 @@
             # we're outside the repo. return an absolute path.
             return cwd
 
-    def pathto(self, f, cwd=None):
+    def pathto(self, f: bytes, cwd: Optional[bytes] = None) -> bytes:
         if cwd is None:
             cwd = self.getcwd()
         path = util.pathto(self._root, cwd, f)
@@ -593,31 +605,31 @@
             return util.pconvert(path)
         return path
 
-    def get_entry(self, path):
+    def get_entry(self, path: bytes) -> intdirstate.DirstateItemT:
         """return a DirstateItem for the associated path"""
         entry = self._map.get(path)
         if entry is None:
             return DirstateItem()
         return entry
 
-    def __contains__(self, key):
+    def __contains__(self, key: Any) -> bool:
         return key in self._map
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[bytes]:
         return iter(sorted(self._map))
 
-    def items(self):
+    def items(self) -> Iterator[Tuple[bytes, intdirstate.DirstateItemT]]:
         return self._map.items()
 
     iteritems = items
 
-    def parents(self):
+    def parents(self) -> List[bytes]:
         return [self._validate(p) for p in self._pl]
 
-    def p1(self):
+    def p1(self) -> bytes:
         return self._validate(self._pl[0])
 
-    def p2(self):
+    def p2(self) -> bytes:
         return self._validate(self._pl[1])
 
     @property
@@ -625,11 +637,11 @@
         """True if a merge is in progress"""
         return self._pl[1] != self._nodeconstants.nullid
 
-    def branch(self):
+    def branch(self) -> bytes:
         return encoding.tolocal(self._branch)
 
     @requires_changing_parents
-    def setparents(self, p1, p2=None):
+    def setparents(self, p1: bytes, p2: Optional[bytes] = None):
         """Set dirstate parents to p1 and p2.
 
         When moving from two parents to one, "merged" entries a
@@ -655,7 +667,9 @@
         fold_p2 = oldp2 != nullid and p2 == nullid
         return self._map.setparents(p1, p2, fold_p2=fold_p2)
 
-    def setbranch(self, branch, transaction):
+    def setbranch(
+        self, branch: bytes, transaction: Optional[intdirstate.TransactionT]
+    ) -> None:
         self.__class__._branch.set(self, encoding.fromlocal(branch))
         if transaction is not None:
             self._setup_tr_abort(transaction)
@@ -683,7 +697,7 @@
     def _write_branch(self, file_obj):
         file_obj.write(self._branch + b'\n')
 
-    def invalidate(self):
+    def invalidate(self) -> None:
         """Causes the next access to reread the dirstate.
 
         This is different from localrepo.invalidatedirstate() because it always
@@ -703,7 +717,7 @@
         self._origpl = None
 
     @requires_changing_any
-    def copy(self, source, dest):
+    def copy(self, source: Optional[bytes], dest: bytes) -> None:
         """Mark dest as a copy of source. Unmark dest if source is None."""
         if source == dest:
             return
@@ -714,10 +728,10 @@
         else:
             self._map.copymap.pop(dest, None)
 
-    def copied(self, file):
+    def copied(self, file: bytes) -> Optional[bytes]:
         return self._map.copymap.get(file, None)
 
-    def copies(self):
+    def copies(self) -> Dict[bytes, bytes]:
         return self._map.copymap
 
     @requires_changing_files
@@ -983,7 +997,9 @@
                 )
         return folded
 
-    def normalize(self, path, isknown=False, ignoremissing=False):
+    def normalize(
+        self, path: bytes, isknown: bool = False, ignoremissing: bool = False
+    ) -> bytes:
         """
         normalize the case of a pathname when on a casefolding filesystem
 
@@ -1009,12 +1025,17 @@
     # - its semantic is unclear
     # - do we really needs it ?
     @requires_changing_parents
-    def clear(self):
+    def clear(self) -> None:
         self._map.clear()
         self._dirty = True
 
     @requires_changing_parents
-    def rebuild(self, parent, allfiles, changedfiles=None):
+    def rebuild(
+        self,
+        parent: bytes,
+        allfiles: Iterable[bytes],  # TODO: more than iterable? (uses len())
+        changedfiles: Optional[Iterable[bytes]] = None,
+    ) -> None:
         matcher = self._sparsematcher
         if matcher is not None and not matcher.always():
             # should not add non-matching files
@@ -1080,7 +1101,7 @@
             on_abort,
         )
 
-    def write(self, tr):
+    def write(self, tr: Optional[intdirstate.TransactionT]) -> None:
         if not self._dirty:
             return
         # make sure we don't request a write of invalidated content
@@ -1130,7 +1151,9 @@
         self._opener.unlink(self._filename_th)
         self._use_tracked_hint = False
 
-    def addparentchangecallback(self, category, callback):
+    def addparentchangecallback(
+        self, category: bytes, callback: intdirstate.AddParentChangeCallbackT
+    ) -> None:
         """add a callback to be called when the wd parents are changed
 
         Callback will be called with the following arguments:
@@ -1165,7 +1188,7 @@
                 return True
         return False
 
-    def _ignorefiles(self):
+    def _ignorefiles(self) -> List[bytes]:
         files = []
         if os.path.exists(self._join(b'.hgignore')):
             files.append(self._join(b'.hgignore'))
@@ -1176,7 +1199,7 @@
                 files.append(os.path.join(self._rootdir, util.expandpath(path)))
         return files
 
-    def _ignorefileandline(self, f):
+    def _ignorefileandline(self, f: bytes) -> intdirstate.IgnoreFileAndLineT:
         files = collections.deque(self._ignorefiles())
         visited = set()
         while files:
@@ -1334,7 +1357,14 @@
 
         return results, dirsfound, dirsnotfound
 
-    def walk(self, match, subrepos, unknown, ignored, full=True):
+    def walk(
+        self,
+        match: matchmod.basematcher,
+        subrepos: Any,
+        unknown: bool,
+        ignored: bool,
+        full: bool = True,
+    ) -> intdirstate.WalkReturnT:
         """
         Walk recursively through the directory tree, finding all files
         matched by match.
@@ -1607,7 +1637,14 @@
         )
         return (lookup, status)
 
-    def status(self, match, subrepos, ignored, clean, unknown):
+    def status(
+        self,
+        match: matchmod.basematcher,
+        subrepos: bool,
+        ignored: bool,
+        clean: bool,
+        unknown: bool,
+    ) -> intdirstate.StatusReturnT:
         """Determine the status of the working copy relative to the
         dirstate and return a pair of (unsure, status), where status is of type
         scmutil.status and:
@@ -1745,7 +1782,7 @@
         )
         return (lookup, status, mtime_boundary)
 
-    def matches(self, match):
+    def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
         """
         return files in the dirstate (in whatever state) filtered by match
         """
@@ -1778,7 +1815,9 @@
                 files.append(self._map.docket.data_filename())
         return tuple(files)
 
-    def verify(self, m1, m2, p1, narrow_matcher=None):
+    def verify(
+        self, m1, m2, p1: bytes, narrow_matcher: Optional[Any] = None
+    ) -> Iterator[bytes]:
         """
         check the dirstate contents against the parent manifest and yield errors
         """
--- a/mercurial/interfaces/dirstate.py	Fri Sep 27 12:10:25 2024 -0400
+++ b/mercurial/interfaces/dirstate.py	Fri Sep 27 12:30:37 2024 -0400
@@ -1,11 +1,19 @@
 from __future__ import annotations
 
 import contextlib
+import os
 import typing
 
 from typing import (
+    Any,
     Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
     Protocol,
+    Tuple,
 )
 
 if typing.TYPE_CHECKING:
@@ -13,8 +21,52 @@
     # to avoid circular imports
     from .. import (
         match as matchmod,
+        scmutil,
+        transaction as txnmod,
     )
 
+    # TODO: finish adding type hints
+    AddParentChangeCallbackT = Callable[
+        ["idirstate", Tuple[Any, Any], Tuple[Any, Any]], Any
+    ]
+    """The callback type for dirstate.addparentchangecallback()."""
+
+    # TODO: add a Protocol for dirstatemap.DirStateItem? (It is
+    #  conditionalized with python or rust implementations.  Also,
+    #  git.dirstate needs to yield non-None from ``items()``.)
+    DirstateItemT = Any  # dirstatemap.DirstateItem
+
+    IgnoreFileAndLineT = Tuple[Optional[bytes], int, bytes]
+    """The return type of dirstate._ignorefileandline(), which holds
+    ``(file, lineno, originalline)``.
+    """
+
+    FlagFuncFallbackT = Callable[[], "FlagFuncReturnT"]
+    """The type for the dirstate.flagfunc() fallback function."""
+
+    FlagFuncReturnT = Callable[[bytes], bytes]
+    """The return type of dirstate.flagfunc()."""
+
+    # TODO: verify and complete this- it came from a pytype *.pyi file
+    StatusReturnT = Tuple[Any, scmutil.status, Any]
+    """The return type of dirstate.status()."""
+
+    # TODO: probably doesn't belong here.
+    TransactionT = txnmod.transaction
+    """The type for a transaction used with dirstate.
+
+    This is meant to help callers avoid having to remember to delay the import
+    of the transaction module.
+    """
+
+    # TODO: The value can also be mercurial.osutil.stat
+    WalkReturnT = Dict[bytes, Optional[os.stat_result]]
+    """The return type of dirstate.walk().
+
+    The matched files are keyed in the dictionary, mapped to a stat-like object
+    if the file exists.
+    """
+
 
 class idirstate(Protocol):
     # TODO: convert these constructor args to fields?
@@ -56,10 +108,10 @@
     def is_changing_files(self) -> bool:
         """True if file tracking changes in progress."""
 
-    def _ignorefiles(self):
+    def _ignorefiles(self) -> List[bytes]:
         """Return a list of files containing patterns to ignore."""
 
-    def _ignorefileandline(self, f):
+    def _ignorefileandline(self, f: bytes) -> IgnoreFileAndLineT:
         """Given a file `f`, return the ignore file and line that ignores it."""
 
     # TODO: decorate with `@util.propertycache` like dirstate class?
@@ -75,7 +127,7 @@
         """Callable for checking exec bits."""  # TODO: this comment looks stale
 
     @contextlib.contextmanager
-    def changing_parents(self, repo):
+    def changing_parents(self, repo) -> Iterator:  # TODO: typehint this
         """Context manager for handling dirstate parents.
 
         If an exception occurs in the scope of the context manager,
@@ -84,7 +136,7 @@
         """
 
     @contextlib.contextmanager
-    def changing_files(self, repo):
+    def changing_files(self, repo) -> Iterator:  # TODO: typehint this
         """Context manager for handling dirstate files.
 
         If an exception occurs in the scope of the context manager,
@@ -92,10 +144,10 @@
         released.
         """
 
-    def hasdir(self, d):
+    def hasdir(self, d: bytes) -> bool:
         pass
 
-    def flagfunc(self, buildfallback):
+    def flagfunc(self, buildfallback: FlagFuncFallbackT) -> FlagFuncReturnT:
         """build a callable that returns flags associated with a filename
 
         The information is extracted from three possible layers:
@@ -104,7 +156,7 @@
         3. a more expensive mechanism inferring the flags from the parents.
         """
 
-    def getcwd(self):
+    def getcwd(self) -> bytes:
         """Return the path from which a canonical path is calculated.
 
         This path should be used to resolve file patterns or to convert
@@ -112,19 +164,19 @@
         used to get real file paths. Use vfs functions instead.
         """
 
-    def pathto(self, f, cwd=None):
+    def pathto(self, f: bytes, cwd: Optional[bytes] = None) -> bytes:
         pass
 
-    def get_entry(self, path):
+    def get_entry(self, path: bytes) -> DirstateItemT:
         """return a DirstateItem for the associated path"""
 
-    def __contains__(self, key):
+    def __contains__(self, key: Any) -> bool:
         """Check if bytestring `key` is known to the dirstate."""
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[bytes]:
         """Iterate the dirstate's contained filenames as bytestrings."""
 
-    def items(self):
+    def items(self) -> Iterator[Tuple[bytes, DirstateItemT]]:
         """Iterate the dirstate's entries as (filename, DirstateItem.
 
         As usual, filename is a bytestring.
@@ -132,19 +184,20 @@
 
     iteritems = items
 
-    def parents(self):
+    def parents(self) -> List[bytes]:
         pass
 
-    def p1(self):
+    def p1(self) -> bytes:
         pass
 
-    def p2(self):
+    def p2(self) -> bytes:
         pass
 
-    def branch(self):
+    def branch(self) -> bytes:
         pass
 
-    def setparents(self, p1, p2=None):
+    # TODO: typehint the return.  It's a copies Map of some sort.
+    def setparents(self, p1: bytes, p2: Optional[bytes] = None):
         """Set dirstate parents to p1 and p2.
 
         When moving from two parents to one, "merged" entries a
@@ -154,26 +207,30 @@
         See localrepo.setparents()
         """
 
-    def setbranch(self, branch, transaction):
+    def setbranch(
+        self, branch: bytes, transaction: Optional[TransactionT]
+    ) -> None:
         pass
 
-    def invalidate(self):
+    def invalidate(self) -> None:
         """Causes the next access to reread the dirstate.
 
         This is different from localrepo.invalidatedirstate() because it always
         rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
         check whether the dirstate has changed before rereading it."""
 
-    def copy(self, source, dest):
+    def copy(self, source: Optional[bytes], dest: bytes) -> None:
         """Mark dest as a copy of source. Unmark dest if source is None."""
 
-    def copied(self, file):
+    def copied(self, file: bytes) -> Optional[bytes]:
         pass
 
-    def copies(self):
+    def copies(self) -> Dict[bytes, bytes]:
         pass
 
-    def normalize(self, path, isknown=False, ignoremissing=False):
+    def normalize(
+        self, path: bytes, isknown: bool = False, ignoremissing: bool = False
+    ) -> bytes:
         """
         normalize the case of a pathname when on a casefolding filesystem
 
@@ -191,16 +248,23 @@
         - version provided via command arguments
         """
 
-    def clear(self):
+    def clear(self) -> None:
         pass
 
-    def rebuild(self, parent, allfiles, changedfiles=None):
+    def rebuild(
+        self,
+        parent: bytes,
+        allfiles: Iterable[bytes],  # TODO: more than iterable? (uses len())
+        changedfiles: Optional[Iterable[bytes]] = None,
+    ) -> None:
         pass
 
-    def write(self, tr):
+    def write(self, tr: Optional[TransactionT]) -> None:
         pass
 
-    def addparentchangecallback(self, category, callback):
+    def addparentchangecallback(
+        self, category: bytes, callback: AddParentChangeCallbackT
+    ) -> None:
         """add a callback to be called when the wd parents are changed
 
         Callback will be called with the following arguments:
@@ -210,7 +274,14 @@
         with a newer callback.
         """
 
-    def walk(self, match, subrepos, unknown, ignored, full=True):
+    def walk(
+        self,
+        match: matchmod.basematcher,
+        subrepos: Any,  # TODO: figure out what this is
+        unknown: bool,
+        ignored: bool,
+        full: bool = True,
+    ) -> WalkReturnT:
         """
         Walk recursively through the directory tree, finding all files
         matched by match.
@@ -222,7 +293,14 @@
 
         """
 
-    def status(self, match, subrepos, ignored, clean, unknown):
+    def status(
+        self,
+        match: matchmod.basematcher,
+        subrepos: bool,
+        ignored: bool,
+        clean: bool,
+        unknown: bool,
+    ) -> StatusReturnT:
         """Determine the status of the working copy relative to the
         dirstate and return a pair of (unsure, status), where status is of type
         scmutil.status and:
@@ -239,12 +317,18 @@
             dirstate was written
         """
 
-    def matches(self, match):
+    # TODO: could return a list, except git.dirstate is a generator
+
+    def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
         """
         return files in the dirstate (in whatever state) filtered by match
         """
 
-    def verify(self, m1, m2, p1, narrow_matcher=None):
+    # TODO: finish adding typehints here, and to subclasses
+
+    def verify(
+        self, m1, m2, p1: bytes, narrow_matcher: Optional[Any] = None
+    ) -> Iterator[bytes]:
         """
         check the dirstate contents against the parent manifest and yield errors
         """