changeset 52171:7be39c5110c9

hg-core: add a complete VFS This will be used from Python in a later change. More changes are needed in hg-core and rhg to properly clean up the APIs of the old VFS implementation but it can be done when the dust settles and we start adding more functionality to the pure Rust VFS.
author Raphaël Gomès <rgomes@octobus.net>
date Mon, 29 Jul 2024 20:47:43 +0200
parents 1a8466fd904a
children 067ec8574c33
files rust/hg-core/src/errors.rs rust/hg-core/src/lock.rs rust/hg-core/src/logging.rs rust/hg-core/src/repo.rs rust/hg-core/src/revlog/changelog.rs rust/hg-core/src/revlog/file_io.rs rust/hg-core/src/revlog/mod.rs rust/hg-core/src/vfs.rs rust/hg-cpython/src/vfs.rs
diffstat 9 files changed, 301 insertions(+), 94 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/errors.rs	Mon Jul 29 20:28:42 2024 +0200
+++ b/rust/hg-core/src/errors.rs	Mon Jul 29 20:47:43 2024 +0200
@@ -66,6 +66,10 @@
         from: std::path::PathBuf,
         to: std::path::PathBuf,
     },
+    CopyingFile {
+        from: std::path::PathBuf,
+        to: std::path::PathBuf,
+    },
     /// `std::fs::canonicalize`
     CanonicalizingPath(std::path::PathBuf),
     /// `std::env::current_dir`
@@ -147,6 +151,12 @@
                 from.display(),
                 to.display()
             ),
+            IoErrorContext::CopyingFile { from, to } => write!(
+                f,
+                "when copying {} to {}",
+                from.display(),
+                to.display()
+            ),
             IoErrorContext::CanonicalizingPath(path) => {
                 write!(f, "when canonicalizing {}", path.display())
             }
--- a/rust/hg-core/src/lock.rs	Mon Jul 29 20:28:42 2024 +0200
+++ b/rust/hg-core/src/lock.rs	Mon Jul 29 20:47:43 2024 +0200
@@ -2,9 +2,11 @@
 
 use crate::errors::HgError;
 use crate::errors::HgResultExt;
+use crate::vfs::Vfs;
 use crate::vfs::VfsImpl;
 use std::io;
 use std::io::ErrorKind;
+use std::path::Path;
 
 #[derive(derive_more::From)]
 pub enum LockError {
@@ -65,7 +67,7 @@
         if !lock_should_be_broken(&lock_data) {
             return Err(LockError::AlreadyHeld);
         }
-        Ok(hg_vfs.remove_file(lock_filename)?)
+        Ok(hg_vfs.unlink(Path::new(lock_filename))?)
     })?
 }
 
@@ -99,7 +101,7 @@
 }
 
 fn unlock(hg_vfs: &VfsImpl, lock_filename: &str) -> Result<(), HgError> {
-    hg_vfs.remove_file(lock_filename)
+    hg_vfs.unlink(Path::new(lock_filename))
 }
 
 /// Return whether the process that is/was holding the lock is known not to be
--- a/rust/hg-core/src/logging.rs	Mon Jul 29 20:28:42 2024 +0200
+++ b/rust/hg-core/src/logging.rs	Mon Jul 29 20:47:43 2024 +0200
@@ -1,6 +1,7 @@
 use crate::errors::{HgError, HgResultExt, IoErrorContext, IoResultExt};
-use crate::vfs::VfsImpl;
+use crate::vfs::{Vfs, VfsImpl};
 use std::io::Write;
+use std::path::Path;
 
 /// An utility to append to a log file with the given name, and optionally
 /// rotate it after it reaches a certain maximum size.
@@ -64,15 +65,20 @@
                 for i in (1..self.max_files).rev() {
                     self.vfs
                         .rename(
-                            format!("{}.{}", self.name, i),
-                            format!("{}.{}", self.name, i + 1),
+                            Path::new(&format!("{}.{}", self.name, i)),
+                            Path::new(&format!("{}.{}", self.name, i + 1)),
+                            false,
                         )
                         .io_not_found_as_none()?;
                 }
                 // Then rename `{name}` to `{name}.1`. This is the
                 // previously-opened `file`.
                 self.vfs
-                    .rename(self.name, format!("{}.1", self.name))
+                    .rename(
+                        Path::new(&self.name),
+                        Path::new(&format!("{}.1", self.name)),
+                        false,
+                    )
                     .io_not_found_as_none()?;
                 // Finally, create a new `{name}` file and replace our `file`
                 // handle.
@@ -87,9 +93,7 @@
 #[test]
 fn test_rotation() {
     let temp = tempfile::tempdir().unwrap();
-    let vfs = VfsImpl {
-        base: temp.path().to_owned(),
-    };
+    let vfs = VfsImpl::new(temp.path().to_owned(), false);
     let logger = LogFile::new(vfs.clone(), "log")
         .max_size(Some(3))
         .max_files(2);
--- a/rust/hg-core/src/repo.rs	Mon Jul 29 20:28:42 2024 +0200
+++ b/rust/hg-core/src/repo.rs	Mon Jul 29 20:47:43 2024 +0200
@@ -18,7 +18,7 @@
 use crate::utils::files::get_path_from_bytes;
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
-use crate::vfs::{is_dir, is_file, VfsImpl};
+use crate::vfs::{is_dir, is_file, Vfs, VfsImpl};
 use crate::DirstateError;
 use crate::{
     exit_codes, requirements, NodePrefix, RevlogType, UncheckedRevision,
@@ -147,9 +147,7 @@
         let mut repo_config_files =
             vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
 
-        let hg_vfs = VfsImpl {
-            base: dot_hg.to_owned(),
-        };
+        let hg_vfs = VfsImpl::new(dot_hg.to_owned(), false);
         let mut reqs = requirements::load_if_exists(&hg_vfs)?;
         let relative =
             reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
@@ -191,9 +189,10 @@
 
             store_path = shared_path.join("store");
 
-            let source_is_share_safe = requirements::load(VfsImpl {
-                base: shared_path.to_owned(),
-            })?
+            let source_is_share_safe = requirements::load(VfsImpl::new(
+                shared_path.to_owned(),
+                true,
+            ))?
             .contains(requirements::SHARESAFE_REQUIREMENT);
 
             if share_safe != source_is_share_safe {
@@ -205,9 +204,10 @@
             }
         }
         if share_safe {
-            reqs.extend(requirements::load(VfsImpl {
-                base: store_path.to_owned(),
-            })?);
+            reqs.extend(requirements::load(VfsImpl::new(
+                store_path.to_owned(),
+                true,
+            ))?);
         }
 
         let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
@@ -248,23 +248,17 @@
     /// For accessing repository files (in `.hg`), except for the store
     /// (`.hg/store`).
     pub fn hg_vfs(&self) -> VfsImpl {
-        VfsImpl {
-            base: self.dot_hg.to_owned(),
-        }
+        VfsImpl::new(self.dot_hg.to_owned(), false)
     }
 
     /// For accessing repository store files (in `.hg/store`)
     pub fn store_vfs(&self) -> VfsImpl {
-        VfsImpl {
-            base: self.store.to_owned(),
-        }
+        VfsImpl::new(self.store.to_owned(), false)
     }
 
     /// For accessing the working copy
     pub fn working_directory_vfs(&self) -> VfsImpl {
-        VfsImpl {
-            base: self.working_directory.to_owned(),
-        }
+        VfsImpl::new(self.working_directory.to_owned(), false)
     }
 
     pub fn try_with_wlock_no_wait<R>(
@@ -795,7 +789,7 @@
         if let Some(uuid) = old_uuid_to_remove {
             // Remove the old data file after the new docket pointing to the
             // new data file was written.
-            vfs.remove_file(format!("dirstate.{}", uuid))?;
+            vfs.unlink(Path::new(&format!("dirstate.{}", uuid)))?;
         }
         Ok(())
     }
--- a/rust/hg-core/src/revlog/changelog.rs	Mon Jul 29 20:28:42 2024 +0200
+++ b/rust/hg-core/src/revlog/changelog.rs	Mon Jul 29 20:47:43 2024 +0200
@@ -562,9 +562,7 @@
     fn test_data_from_rev_null() -> Result<(), RevlogError> {
         // an empty revlog will be enough for this case
         let temp = tempfile::tempdir().unwrap();
-        let vfs = VfsImpl {
-            base: temp.path().to_owned(),
-        };
+        let vfs = VfsImpl::new(temp.path().to_owned(), false);
         std::fs::write(temp.path().join("foo.i"), b"").unwrap();
         let revlog =
             Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
--- a/rust/hg-core/src/revlog/file_io.rs	Mon Jul 29 20:28:42 2024 +0200
+++ b/rust/hg-core/src/revlog/file_io.rs	Mon Jul 29 20:47:43 2024 +0200
@@ -382,7 +382,7 @@
         let filename = Path::new("a");
         let file_path = base.join(filename);
         let raf = RandomAccessFile::new(
-            Box::new(VfsImpl { base }),
+            Box::new(VfsImpl::new(base.clone(), true)),
             filename.to_owned(),
         );
 
@@ -411,7 +411,7 @@
         let filename = base.join("a");
         // No `create` should fail
         FileHandle::new(
-            Box::new(VfsImpl { base: base.clone() }),
+            Box::new(VfsImpl::new(base.clone(), false)),
             &filename,
             false,
             false,
@@ -420,7 +420,7 @@
         std::fs::write(&filename, b"1234567890").unwrap();
 
         let mut read_handle = FileHandle::new(
-            Box::new(VfsImpl { base: base.clone() }),
+            Box::new(VfsImpl::new(base.clone(), true)),
             &filename,
             false,
             false,
@@ -445,17 +445,14 @@
         // Seeking too much data should fail
         read_handle.read_exact(1000).unwrap_err();
 
-        // Work around the yet unimplemented VFS for write
-        let mut options = std::fs::OpenOptions::new();
-        options.read(true);
-        options.write(true);
-        let file = options.open(&filename).unwrap();
         // Open a write handle
-        let mut handle = FileHandle::from_file(
-            file,
-            Box::new(VfsImpl { base: base.clone() }),
+        let mut handle = FileHandle::new(
+            Box::new(VfsImpl::new(base.clone(), false)),
             &filename,
-        );
+            false,
+            true,
+        )
+        .unwrap();
 
         // Now writing should succeed
         handle.write_all(b"new data").unwrap();
@@ -463,15 +460,28 @@
         assert_eq!(handle.position().unwrap(), 8);
         // We can still read
         assert_eq!(handle.read_exact(2).unwrap(), b"90".to_vec());
+
+        let mut read_handle = FileHandle::new(
+            Box::new(VfsImpl::new(base.clone(), true)),
+            &filename,
+            false,
+            false,
+        )
+        .unwrap();
+        read_handle.seek(SeekFrom::Start(0)).unwrap();
+        // On-disk file contents should be changed
+        assert_eq!(
+            &read_handle.read_exact(10).unwrap(),
+            &b"new data90".to_vec(),
+        );
         // Flushing doesn't do anything unexpected
         handle.flush().unwrap();
 
         let delayed_buffer = Arc::new(Mutex::new(DelayedBuffer::default()));
-        let file = options.open(&filename).unwrap();
-        let mut handle = FileHandle::from_file_delayed(
-            file,
-            Box::new(VfsImpl { base: base.clone() }),
+        let mut handle = FileHandle::new_delayed(
+            Box::new(VfsImpl::new(base.clone(), false)),
             &filename,
+            false,
             delayed_buffer,
         )
         .unwrap();
--- a/rust/hg-core/src/revlog/mod.rs	Mon Jul 29 20:28:42 2024 +0200
+++ b/rust/hg-core/src/revlog/mod.rs	Mon Jul 29 20:47:43 2024 +0200
@@ -719,9 +719,7 @@
     #[test]
     fn test_empty() {
         let temp = tempfile::tempdir().unwrap();
-        let vfs = VfsImpl {
-            base: temp.path().to_owned(),
-        };
+        let vfs = VfsImpl::new(temp.path().to_owned(), false);
         std::fs::write(temp.path().join("foo.i"), b"").unwrap();
         std::fs::write(temp.path().join("foo.d"), b"").unwrap();
         let revlog =
@@ -743,9 +741,7 @@
     #[test]
     fn test_inline() {
         let temp = tempfile::tempdir().unwrap();
-        let vfs = VfsImpl {
-            base: temp.path().to_owned(),
-        };
+        let vfs = VfsImpl::new(temp.path().to_owned(), false);
         let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
             .unwrap();
         let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
@@ -812,9 +808,7 @@
     #[test]
     fn test_nodemap() {
         let temp = tempfile::tempdir().unwrap();
-        let vfs = VfsImpl {
-            base: temp.path().to_owned(),
-        };
+        let vfs = VfsImpl::new(temp.path().to_owned(), false);
 
         // building a revlog with a forced Node starting with zeros
         // This is a corruption, but it does not preclude using the nodemap
--- a/rust/hg-core/src/vfs.rs	Mon Jul 29 20:28:42 2024 +0200
+++ b/rust/hg-core/src/vfs.rs	Mon Jul 29 20:47:43 2024 +0200
@@ -2,20 +2,66 @@
 use crate::exit_codes;
 use dyn_clone::DynClone;
 use memmap2::{Mmap, MmapOptions};
-use std::fs::File;
+use rand::distributions::DistString;
+use rand_distr::Alphanumeric;
+use std::fs::{File, OpenOptions};
 use std::io::{ErrorKind, Write};
-use std::os::unix::fs::MetadataExt;
+use std::os::unix::fs::{MetadataExt, PermissionsExt};
 use std::path::{Path, PathBuf};
+use std::sync::OnceLock;
 
 /// Filesystem access abstraction for the contents of a given "base" diretory
 #[derive(Clone)]
 pub struct VfsImpl {
     pub(crate) base: PathBuf,
+    pub readonly: bool,
+    pub mode: Option<u32>,
 }
 
 struct FileNotFound(std::io::Error, PathBuf);
 
+/// Store the umask for the whole process since it's expensive to get.
+static UMASK: OnceLock<u32> = OnceLock::new();
+
+fn get_umask() -> u32 {
+    *UMASK.get_or_init(|| unsafe {
+        // TODO is there any way of getting the umask without temporarily
+        // setting it? Doesn't this affect all threads in this tiny window?
+        let mask = libc::umask(0);
+        libc::umask(mask);
+        mask & 0o777
+    })
+}
+
+/// Return the (unix) mode with which we will create/fix files
+fn get_mode(base: impl AsRef<Path>) -> Option<u32> {
+    match base.as_ref().metadata() {
+        Ok(meta) => {
+            // files in .hg/ will be created using this mode
+            let mode = meta.mode();
+            // avoid some useless chmods
+            if (0o777 & !get_umask()) == (0o777 & mode) {
+                None
+            } else {
+                Some(mode)
+            }
+        }
+        Err(_) => None,
+    }
+}
+
 impl VfsImpl {
+    pub fn new(base: PathBuf, readonly: bool) -> Self {
+        let mode = get_mode(&base);
+        Self {
+            base,
+            readonly,
+            mode,
+        }
+    }
+
+    // XXX these methods are probably redundant with VFS trait?
+
     pub fn join(&self, relative_path: impl AsRef<Path>) -> PathBuf {
         self.base.join(relative_path)
     }
@@ -103,26 +149,6 @@
         }
     }
 
-    pub fn rename(
-        &self,
-        relative_from: impl AsRef<Path>,
-        relative_to: impl AsRef<Path>,
-    ) -> Result<(), HgError> {
-        let from = self.join(relative_from);
-        let to = self.join(relative_to);
-        std::fs::rename(&from, &to)
-            .with_context(|| IoErrorContext::RenamingFile { from, to })
-    }
-
-    pub fn remove_file(
-        &self,
-        relative_path: impl AsRef<Path>,
-    ) -> Result<(), HgError> {
-        let path = self.join(relative_path);
-        std::fs::remove_file(&path)
-            .with_context(|| IoErrorContext::RemovingFile(path))
-    }
-
     #[cfg(unix)]
     pub fn create_symlink(
         &self,
@@ -284,27 +310,97 @@
         check_ambig: bool,
     ) -> Result<(), HgError>;
     fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError>;
+    fn base(&self) -> &Path;
 }
 
 /// These methods will need to be implemented once `rhg` (and other) non-Python
 /// users of `hg-core` start doing more on their own, like writing to files.
 impl Vfs for VfsImpl {
-    fn open(&self, _filename: &Path) -> Result<std::fs::File, HgError> {
-        todo!()
+    fn open(&self, filename: &Path) -> Result<std::fs::File, HgError> {
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        // TODO auditpath
+        let path = self.base.join(filename);
+        copy_in_place_if_hardlink(&path)?;
+
+        OpenOptions::new()
+            .create(false)
+            .create_new(false)
+            .write(true)
+            .read(true)
+            .open(&path)
+            .when_writing_file(&path)
     }
+
     fn open_read(&self, filename: &Path) -> Result<std::fs::File, HgError> {
+        // TODO auditpath
         let path = self.base.join(filename);
         std::fs::File::open(&path).when_reading_file(&path)
     }
+
     fn open_check_ambig(
         &self,
-        _filename: &Path,
+        filename: &Path,
     ) -> Result<std::fs::File, HgError> {
-        todo!()
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+
+        let path = self.base.join(filename);
+        copy_in_place_if_hardlink(&path)?;
+
+        // TODO auditpath, check ambig
+        OpenOptions::new()
+            .write(true)
+            .read(true) // Can be used for reading to save on `open` calls
+            .create(false)
+            .open(&path)
+            .when_reading_file(&path)
     }
-    fn create(&self, _filename: &Path) -> Result<std::fs::File, HgError> {
-        todo!()
+
+    fn create(&self, filename: &Path) -> Result<std::fs::File, HgError> {
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        // TODO auditpath
+        let path = self.base.join(filename);
+        let parent = path.parent().expect("file at root");
+        std::fs::create_dir_all(parent).when_writing_file(parent)?;
+        // TODO checkambig (wrap File somehow)
+
+        let file = OpenOptions::new()
+            .create(true)
+            .truncate(true)
+            .write(true)
+            .read(true)
+            .open(&path)
+            .when_writing_file(&path)?;
+
+        if let Some(mode) = self.mode {
+            // Creating the file with the right permission (with `.mode()`)
+            // may not work since umask takes effect for file creation.
+            // So we need to fix the permission after creating the file.
+            fix_directory_permissions(&self.base, &path, mode)?;
+            let perm = std::fs::Permissions::from_mode(mode & 0o666);
+            std::fs::set_permissions(&path, perm).when_writing_file(&path)?;
+        }
+
+        Ok(file)
     }
+
     fn create_atomic(
         &self,
         _filename: &Path,
@@ -312,6 +408,7 @@
     ) -> Result<AtomicFile, HgError> {
         todo!()
     }
+
     fn file_size(&self, file: &File) -> Result<u64, HgError> {
         Ok(file
             .metadata()
@@ -324,23 +421,116 @@
             })?
             .size())
     }
-    fn exists(&self, _filename: &Path) -> bool {
-        todo!()
+
+    fn exists(&self, filename: &Path) -> bool {
+        self.base.join(filename).exists()
     }
-    fn unlink(&self, _filename: &Path) -> Result<(), HgError> {
-        todo!()
+
+    fn unlink(&self, filename: &Path) -> Result<(), HgError> {
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        let path = self.base.join(filename);
+        std::fs::remove_file(&path)
+            .with_context(|| IoErrorContext::RemovingFile(path))
     }
+
     fn rename(
         &self,
-        _from: &Path,
-        _to: &Path,
+        from: &Path,
+        to: &Path,
         _check_ambig: bool,
     ) -> Result<(), HgError> {
-        todo!()
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        // TODO checkambig
+        let from = self.base.join(from);
+        let to = self.base.join(to);
+        std::fs::rename(&from, &to)
+            .with_context(|| IoErrorContext::RenamingFile { from, to })
+    }
+
+    fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> {
+        // TODO checkambig?
+        let from = self.base.join(from);
+        let to = self.base.join(to);
+        std::fs::copy(&from, &to)
+            .with_context(|| IoErrorContext::CopyingFile { from, to })
+            .map(|_| ())
+    }
+
+    fn base(&self) -> &Path {
+        &self.base
     }
-    fn copy(&self, _from: &Path, _to: &Path) -> Result<(), HgError> {
-        todo!()
+}
+
+fn fix_directory_permissions(
+    base: &Path,
+    path: &Path,
+    mode: u32,
+) -> Result<(), HgError> {
+    let mut ancestors = path.ancestors();
+    ancestors.next(); // yields the path itself
+
+    for ancestor in ancestors {
+        if ancestor == base {
+            break;
+        }
+        let perm = std::fs::Permissions::from_mode(mode);
+        std::fs::set_permissions(ancestor, perm)
+            .when_writing_file(ancestor)?;
     }
+    Ok(())
+}
+
+/// Detects whether `path` is a hardlink and does a tmp copy + rename erase
+/// to turn it into its own file. Revlogs are usually hardlinked when doing
+/// a local clone, and we don't want to modify the original repo.
+fn copy_in_place_if_hardlink(path: &Path) -> Result<(), HgError> {
+    let metadata = path.metadata().when_writing_file(path)?;
+    if metadata.nlink() > 0 {
+        // If it's hardlinked, copy it and rename it back before changing it.
+        let tmpdir = path.parent().expect("file at root");
+        let name = Alphanumeric.sample_string(&mut rand::thread_rng(), 16);
+        let tmpfile = tmpdir.join(name);
+        std::fs::create_dir_all(tmpfile.parent().expect("file at root"))
+            .with_context(|| IoErrorContext::CopyingFile {
+                from: path.to_owned(),
+                to: tmpfile.to_owned(),
+            })?;
+        std::fs::copy(path, &tmpfile).with_context(|| {
+            IoErrorContext::CopyingFile {
+                from: path.to_owned(),
+                to: tmpfile.to_owned(),
+            }
+        })?;
+        std::fs::rename(&tmpfile, path).with_context(|| {
+            IoErrorContext::RenamingFile {
+                from: tmpfile,
+                to: path.to_owned(),
+            }
+        })?;
+    }
+    Ok(())
+}
+
+pub fn is_revlog_file(path: impl AsRef<Path>) -> bool {
+    path.as_ref()
+        .extension()
+        .map(|ext| {
+            ["i", "idx", "d", "dat", "n", "nd", "sda"]
+                .contains(&ext.to_string_lossy().as_ref())
+        })
+        .unwrap_or(false)
 }
 
 pub(crate) fn is_dir(path: impl AsRef<Path>) -> Result<bool, HgError> {
--- a/rust/hg-cpython/src/vfs.rs	Mon Jul 29 20:28:42 2024 +0200
+++ b/rust/hg-cpython/src/vfs.rs	Mon Jul 29 20:47:43 2024 +0200
@@ -286,4 +286,9 @@
         std::fs::copy(from, to).when_writing_file(to)?;
         Ok(())
     }
+
+    fn base(&self) -> &Path {
+        // This will only be useful in a later patch
+        todo!()
+    }
 }