Mercurial > hg
changeset 47683:284a20269a97
dirstate-v2: Separate iterators for dirfoldmap and debugdirstate
`dirstatemap.dirfoldmap` was recently changed to re-use a Rust iterator
that was added for the `hg debugdirstate` command.
That iterator was based on all nodes in the tree dirstate without an entry
only existing to hold child nodes, and therefore being directories.
However to optimize status further we may want to store additional nodes
for unknown or ignored files and directories. At that point the two users
of this iterator will want different things, so let’s make two iterators
instead.
See doc-comments in `dispatch.rs`.
Differential Revision: https://phab.mercurial-scm.org/D11099
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Fri, 16 Jul 2021 14:08:26 +0200 |
parents | 78f7f0d490ee |
children | 852262e2e7d9 |
files | mercurial/debugcommands.py mercurial/dirstatemap.py rust/hg-core/src/dirstate.rs rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/dispatch.rs rust/hg-cpython/src/dirstate.rs rust/hg-cpython/src/dirstate/dirstate_map.rs rust/hg-cpython/src/dirstate/dispatch.rs tests/test-completion.t tests/test-status.t |
diffstat | 10 files changed, 169 insertions(+), 71 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/debugcommands.py Thu Jul 15 23:02:17 2021 +0200 +++ b/mercurial/debugcommands.py Fri Jul 16 14:08:26 2021 +0200 @@ -942,7 +942,12 @@ ), (b'', b'dates', True, _(b'display the saved mtime')), (b'', b'datesort', None, _(b'sort by saved mtime')), - (b'', b'dirs', False, _(b'display directories')), + ( + b'', + b'all', + False, + _(b'display dirstate-v2 tree nodes that would not exist in v1'), + ), ], _(b'[OPTION]...'), ) @@ -961,9 +966,10 @@ ) # sort by mtime, then by filename else: keyfunc = None # sort by filename - entries = list(pycompat.iteritems(repo.dirstate)) - if opts['dirs']: - entries.extend(repo.dirstate.directories()) + if opts['all']: + entries = list(repo.dirstate._map.debug_iter()) + else: + entries = list(pycompat.iteritems(repo.dirstate)) entries.sort(key=keyfunc) for file_, ent in entries: if ent.v1_mtime() == -1:
--- a/mercurial/dirstatemap.py Thu Jul 15 23:02:17 2021 +0200 +++ b/mercurial/dirstatemap.py Fri Jul 16 14:08:26 2021 +0200 @@ -105,10 +105,6 @@ self._map return self.copymap - def directories(self): - # Rust / dirstate-v2 only - return [] - def clear(self): self._map.clear() self.copymap.clear() @@ -126,6 +122,8 @@ # forward for python2,3 compat iteritems = items + debug_iter = items + def __len__(self): return len(self._map) @@ -525,6 +523,9 @@ def directories(self): return self._rustmap.directories() + def debug_iter(self): + return self._rustmap.debug_iter() + def preload(self): self._rustmap @@ -746,6 +747,6 @@ def dirfoldmap(self): f = {} normcase = util.normcase - for name, _pseudo_entry in self.directories(): + for name in self._rustmap.tracked_dirs(): f[normcase(name)] = name return f
--- a/rust/hg-core/src/dirstate.rs Thu Jul 15 23:02:17 2021 +0200 +++ b/rust/hg-core/src/dirstate.rs Fri Jul 16 14:08:26 2021 +0200 @@ -65,6 +65,12 @@ let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK; dirstate_exec_bit != fs_exec_bit } + + /// Returns a `(state, mode, size, mtime)` tuple as for + /// `DirstateMapMethods::debug_iter`. + pub fn debug_tuple(&self) -> (u8, i32, i32, i32) { + (self.state.into(), self.mode, self.size, self.mtime) + } } #[derive(BytesCast)]
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Jul 15 23:02:17 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Jul 16 14:08:26 2021 +0200 @@ -1246,27 +1246,50 @@ })) } - fn iter_directories( + fn iter_tracked_dirs( + &mut self, + ) -> Result< + Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + + Send + + '_, + >, + DirstateError, + > { + let on_disk = self.on_disk; + Ok(Box::new(filter_map_results( + self.iter_nodes(), + move |node| { + Ok(if node.tracked_descendants_count() > 0 { + Some(node.full_path(on_disk)?) + } else { + None + }) + }, + ))) + } + + fn debug_iter( &self, ) -> Box< dyn Iterator< Item = Result< - (&HgPath, Option<Timestamp>), + (&HgPath, (u8, i32, i32, i32)), DirstateV2ParseError, >, > + Send + '_, > { - Box::new(filter_map_results(self.iter_nodes(), move |node| { - Ok(if node.state()?.is_none() { - Some(( - node.full_path(self.on_disk)?, - node.cached_directory_mtime() - .map(|mtime| Timestamp(mtime.seconds())), - )) + Box::new(self.iter_nodes().map(move |node| { + let node = node?; + let debug_tuple = if let Some(entry) = node.entry()? { + entry.debug_tuple() + } else if let Some(mtime) = node.cached_directory_mtime() { + (b' ', 0, -1, mtime.seconds() as i32) } else { - None - }) + (b' ', 0, -1, -1) + }; + Ok((node.full_path(self.on_disk)?, debug_tuple)) })) } }
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs Thu Jul 15 23:02:17 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs Fri Jul 16 14:08:26 2021 +0200 @@ -259,20 +259,40 @@ /// are `Result`s. fn iter(&self) -> StateMapIter<'_>; - /// In the tree dirstate, return an iterator of "directory" (entry-less) - /// nodes with the data stored for them. This is for `hg debugdirstate - /// --dirs`. + /// Returns an iterator of tracked directories. /// - /// In the flat dirstate, returns an empty iterator. + /// This is the paths for which `has_tracked_dir` would return true. + /// Or, in other words, the union of ancestor paths of all paths that have + /// an associated entry in a "tracked" state in this dirstate map. /// /// Because parse errors can happen during iteration, the iterated items /// are `Result`s. - fn iter_directories( + fn iter_tracked_dirs( + &mut self, + ) -> Result< + Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + + Send + + '_, + >, + DirstateError, + >; + + /// Return an iterator of `(path, (state, mode, size, mtime))` for every + /// node stored in this dirstate map, for the purpose of the `hg + /// debugdirstate` command. + /// + /// For nodes that don’t have an entry, `state` is the ASCII space. + /// An `mtime` may still be present. It is used to optimize `status`. + /// + /// Because parse errors can happen during iteration, the iterated items + /// are `Result`s. + fn debug_iter( &self, ) -> Box< dyn Iterator< Item = Result< - (&HgPath, Option<Timestamp>), + (&HgPath, (u8, i32, i32, i32)), DirstateV2ParseError, >, > + Send @@ -476,17 +496,41 @@ Box::new((&**self).iter().map(|(key, value)| Ok((&**key, *value)))) } - fn iter_directories( + fn iter_tracked_dirs( + &mut self, + ) -> Result< + Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + + Send + + '_, + >, + DirstateError, + > { + self.set_all_dirs()?; + Ok(Box::new( + self.all_dirs + .as_ref() + .unwrap() + .iter() + .map(|path| Ok(&**path)), + )) + } + + fn debug_iter( &self, ) -> Box< dyn Iterator< Item = Result< - (&HgPath, Option<Timestamp>), + (&HgPath, (u8, i32, i32, i32)), DirstateV2ParseError, >, > + Send + '_, > { - Box::new(std::iter::empty()) + Box::new( + (&**self) + .iter() + .map(|(path, entry)| Ok((&**path, entry.debug_tuple()))), + ) } }
--- a/rust/hg-cpython/src/dirstate.rs Thu Jul 15 23:02:17 2021 +0200 +++ b/rust/hg-cpython/src/dirstate.rs Fri Jul 16 14:08:26 2021 +0200 @@ -52,9 +52,6 @@ py: Python, entry: &DirstateEntry, ) -> PyResult<PyObject> { - // might be silly to retrieve capsule function in hot loop - let make = make_dirstate_item_capi::retrieve(py)?; - let &DirstateEntry { state, mode, @@ -65,22 +62,19 @@ // because Into<u8> has a specific implementation while `as c_char` would // just do a naive enum cast. let state_code: u8 = state.into(); - - let maybe_obj = unsafe { - let ptr = make(state_code as c_char, mode, size, mtime); - PyObject::from_owned_ptr_opt(py, ptr) - }; - maybe_obj.ok_or_else(|| PyErr::fetch(py)) + make_dirstate_item_raw(py, state_code, mode, size, mtime) } -// XXX a bit strange to have a dedicated function, but directory are not -// treated as dirstate node by hg-core for now so… -pub fn make_directory_item(py: Python, mtime: i32) -> PyResult<PyObject> { - // might be silly to retrieve capsule function in hot loop +pub fn make_dirstate_item_raw( + py: Python, + state: u8, + mode: i32, + size: i32, + mtime: i32, +) -> PyResult<PyObject> { let make = make_dirstate_item_capi::retrieve(py)?; - let maybe_obj = unsafe { - let ptr = make(b'd' as c_char, 0 as i32, 0 as i32, mtime); + let ptr = make(state as c_char, mode, size, mtime); PyObject::from_owned_ptr_opt(py, ptr) }; maybe_obj.ok_or_else(|| PyErr::fetch(py))
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Jul 15 23:02:17 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Fri Jul 16 14:08:26 2021 +0200 @@ -19,8 +19,8 @@ use crate::{ dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator}, - dirstate::make_directory_item, dirstate::make_dirstate_item, + dirstate::make_dirstate_item_raw, dirstate::non_normal_entries::{ NonNormalEntries, NonNormalEntriesIterator, }, @@ -61,17 +61,14 @@ use_dirstate_tree: bool, on_disk: PyBytes, ) -> PyResult<PyObject> { - let dirstate_error = |e: DirstateError| { - PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e)) - }; let (inner, parents) = if use_dirstate_tree { let (map, parents) = OwningDirstateMap::new_v1(py, on_disk) - .map_err(dirstate_error)?; + .map_err(|e| dirstate_error(py, e))?; (Box::new(map) as _, parents) } else { let bytes = on_disk.data(py); let mut map = RustDirstateMap::default(); - let parents = map.read(bytes).map_err(dirstate_error)?; + let parents = map.read(bytes).map_err(|e| dirstate_error(py, e))?; (Box::new(map) as _, parents) }; let map = Self::create_instance(py, inner)?; @@ -550,19 +547,29 @@ ) } - def directories(&self) -> PyResult<PyList> { + def tracked_dirs(&self) -> PyResult<PyList> { let dirs = PyList::new(py, &[]); - for item in self.inner(py).borrow().iter_directories() { - let (path, mtime) = item.map_err(|e| v2_error(py, e))?; + for path in self.inner(py).borrow_mut().iter_tracked_dirs() + .map_err(|e |dirstate_error(py, e))? + { + let path = path.map_err(|e| v2_error(py, e))?; let path = PyBytes::new(py, path.as_bytes()); - let mtime = mtime.map(|t| t.0).unwrap_or(-1); - let item = make_directory_item(py, mtime as i32)?; - let tuple = (path, item); - dirs.append(py, tuple.to_py_object(py).into_object()) + dirs.append(py, path.into_object()) } Ok(dirs) } + def debug_iter(&self) -> PyResult<PyList> { + let dirs = PyList::new(py, &[]); + for item in self.inner(py).borrow().debug_iter() { + let (path, (state, mode, size, mtime)) = + item.map_err(|e| v2_error(py, e))?; + let path = PyBytes::new(py, path.as_bytes()); + let item = make_dirstate_item_raw(py, state, mode, size, mtime)?; + dirs.append(py, (path, item).to_py_object(py).into_object()) + } + Ok(dirs) + } }); impl DirstateMap { @@ -616,3 +623,7 @@ pub(super) fn v2_error(py: Python<'_>, _: DirstateV2ParseError) -> PyErr { PyErr::new::<exc::ValueError, _>(py, "corrupted dirstate-v2") } + +fn dirstate_error(py: Python<'_>, e: DirstateError) -> PyErr { + PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e)) +}
--- a/rust/hg-cpython/src/dirstate/dispatch.rs Thu Jul 15 23:02:17 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/dispatch.rs Fri Jul 16 14:08:26 2021 +0200 @@ -203,17 +203,30 @@ self.get().iter() } - fn iter_directories( + fn iter_tracked_dirs( + &mut self, + ) -> Result< + Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + + Send + + '_, + >, + DirstateError, + > { + self.get_mut().iter_tracked_dirs() + } + + fn debug_iter( &self, ) -> Box< dyn Iterator< Item = Result< - (&HgPath, Option<Timestamp>), + (&HgPath, (u8, i32, i32, i32)), DirstateV2ParseError, >, > + Send + '_, > { - self.get().iter_directories() + self.get().debug_iter() } }
--- a/tests/test-completion.t Thu Jul 15 23:02:17 2021 +0200 +++ b/tests/test-completion.t Fri Jul 16 14:08:26 2021 +0200 @@ -284,7 +284,7 @@ debugdate: extended debugdeltachain: changelog, manifest, dir, template debugdirstateignorepatternshash: - debugdirstate: nodates, dates, datesort, dirs + debugdirstate: nodates, dates, datesort, all debugdiscovery: old, nonheads, rev, seed, local-as-revs, remote-as-revs, ssh, remotecmd, insecure, template debugdownload: output debugextensions: template
--- a/tests/test-status.t Thu Jul 15 23:02:17 2021 +0200 +++ b/tests/test-status.t Fri Jul 16 14:08:26 2021 +0200 @@ -929,23 +929,23 @@ The cached mtime is initially unset - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 unset subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 unset subdir It is still not set when there are unknown files $ touch subdir/unknown $ hg status ? subdir/unknown - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 unset subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 unset subdir Now the directory is eligible for caching, so its mtime is save in the dirstate $ rm subdir/unknown $ hg status - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 set subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 set subdir This time the command should be ever so slightly faster since it does not need `read_dir("subdir")` @@ -963,11 +963,11 @@ Removing a node from the dirstate resets the cache for its parent directory $ hg forget subdir/a - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 set subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 set subdir $ hg ci -qm '#1' - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 unset subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 unset subdir $ hg status ? subdir/a