# HG changeset patch # User Raphaël Gomès # Date 1728568485 -7200 # Node ID 8d35941689afa19584a8ef94c20eefa42845a888 # Parent 735bf027dd1dfdcf40e4a6c58cc9117e64fec69b rust-vfs: support checkambig This was missing from the Rust code, which means worse caching. See https://wiki.mercurial-scm.org/ExactCacheValidationPlan. Explanations on what ambiguity means inline. diff -r 735bf027dd1d -r 8d35941689af rust/hg-core/src/revlog/file_io.rs --- a/rust/hg-core/src/revlog/file_io.rs Tue Oct 08 16:10:30 2024 +0200 +++ b/rust/hg-core/src/revlog/file_io.rs Thu Oct 10 15:54:45 2024 +0200 @@ -2,7 +2,6 @@ use std::{ cell::RefCell, - fs::File, io::{Read, Seek, SeekFrom, Write}, path::{Path, PathBuf}, sync::{Arc, Mutex}, @@ -10,7 +9,7 @@ use crate::{ errors::{HgError, IoResultExt}, - vfs::Vfs, + vfs::{Vfs, VfsFile}, }; /// Wraps accessing arbitrary chunks of data within a file and reusing handles. @@ -123,12 +122,12 @@ } } -/// Holds an open [`File`] and the related data. This can be used for reading -/// and writing. Writes can be delayed to a buffer before touching the disk, -/// if relevant (in the changelog case), but reads are transparent. +/// Holds an open [`VfsFile`] and the related data. This can be used for +/// reading and writing. Writes can be delayed to a buffer before touching +/// the disk, if relevant (in the changelog case), but reads are transparent. pub struct FileHandle { /// The actual open file - pub file: File, + pub file: VfsFile, /// The VFS with which the file was opened vfs: Box, /// Filename of the open file, relative to the repo root @@ -171,7 +170,7 @@ write: bool, ) -> Result { let file = if create { - vfs.create(filename.as_ref())? + vfs.create(filename.as_ref(), false)? } else if write { vfs.open(filename.as_ref())? } else { @@ -193,7 +192,7 @@ delayed_buffer: Arc>, ) -> Result { let mut file = if create { - vfs.create(filename.as_ref())? + vfs.create(filename.as_ref(), false)? } else { vfs.open(filename.as_ref())? }; @@ -216,9 +215,9 @@ }) } - /// Wrap an existing [`File`] + /// Wrap an existing [`VfsFile`] pub fn from_file( - file: File, + file: VfsFile, vfs: Box, filename: impl AsRef, ) -> Self { @@ -230,9 +229,9 @@ } } - /// Wrap an existing [`File`], but writes go to a [`DelayedBuffer`]. + /// Wrap an existing [`VfsFile`], but writes go to a [`DelayedBuffer`]. pub fn from_file_delayed( - mut file: File, + mut file: VfsFile, vfs: Box, filename: impl AsRef, delayed_buffer: Arc>, diff -r 735bf027dd1d -r 8d35941689af rust/hg-core/src/revlog/inner_revlog.rs --- a/rust/hg-core/src/revlog/inner_revlog.rs Tue Oct 08 16:10:30 2024 +0200 +++ b/rust/hg-core/src/revlog/inner_revlog.rs Thu Oct 10 15:54:45 2024 +0200 @@ -857,7 +857,7 @@ if error.kind() != ErrorKind::NotFound { return Err(HgError::IoError { error, context }); } - self.vfs.create(&self.data_file)? + self.vfs.create(&self.data_file, true)? } e => return Err(e), }, @@ -964,7 +964,8 @@ self.writing_handles.take(); self.segment_file.writing_handle.take(); } - let mut new_data_file_handle = self.vfs.create(&self.data_file)?; + let mut new_data_file_handle = + self.vfs.create(&self.data_file, true)?; // Drop any potential data, possibly redundant with the VFS impl. new_data_file_handle .set_len(0) @@ -988,7 +989,7 @@ self.index_file = index_path } - let mut new_index_handle = self.vfs.create(&self.index_file)?; + let mut new_index_handle = self.vfs.create(&self.index_file, true)?; let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE); for r in 0..self.len() { let rev = Revision(r as BaseRevision); diff -r 735bf027dd1d -r 8d35941689af rust/hg-core/src/vfs.rs --- a/rust/hg-core/src/vfs.rs Tue Oct 08 16:10:30 2024 +0200 +++ b/rust/hg-core/src/vfs.rs Thu Oct 10 15:54:45 2024 +0200 @@ -7,10 +7,15 @@ use format_bytes::format_bytes; use memmap2::{Mmap, MmapOptions}; use rand::distributions::{Alphanumeric, DistString}; -use std::fs::{File, OpenOptions}; -use std::io::{ErrorKind, Seek, Write}; +use std::fs::{File, Metadata, OpenOptions}; +use std::io::{ErrorKind, Read, Seek, Write}; +use std::os::fd::AsRawFd; use std::os::unix::fs::{MetadataExt, PermissionsExt}; use std::path::{Path, PathBuf}; +#[cfg(test)] +use std::sync::atomic::AtomicUsize; +#[cfg(test)] +use std::sync::atomic::Ordering; use std::sync::OnceLock; /// Filesystem access abstraction for the contents of a given "base" diretory @@ -203,6 +208,188 @@ } } +/// Abstraction over the files handled by a [`Vfs`]. +#[derive(Debug)] +pub enum VfsFile { + Atomic(AtomicFile), + + Normal { + file: File, + path: PathBuf, + /// If `Some`, check (and maybe fix) this file's timestamp ambiguity. + /// See [`is_filetime_ambiguous`]. + check_ambig: Option, + }, +} + +impl VfsFile { + pub fn normal(file: File, path: PathBuf) -> Self { + Self::Normal { + file, + check_ambig: None, + path, + } + } + pub fn normal_check_ambig( + file: File, + path: PathBuf, + ) -> Result { + Ok(Self::Normal { + file, + check_ambig: Some(path.metadata().when_reading_file(&path)?), + path, + }) + } + pub fn try_clone(&self) -> Result { + Ok(match self { + VfsFile::Atomic(AtomicFile { + fp, + temp_path, + check_ambig, + target_name, + is_open, + }) => Self::Atomic(AtomicFile { + fp: fp.try_clone().when_reading_file(temp_path)?, + temp_path: temp_path.clone(), + check_ambig: *check_ambig, + target_name: target_name.clone(), + is_open: *is_open, + }), + VfsFile::Normal { + file, + check_ambig, + path, + } => Self::Normal { + file: file.try_clone().when_reading_file(path)?, + check_ambig: check_ambig.clone(), + path: path.to_owned(), + }, + }) + } + pub fn set_len(&self, len: u64) -> Result<(), std::io::Error> { + match self { + VfsFile::Atomic(atomic_file) => atomic_file.fp.set_len(len), + VfsFile::Normal { file, .. } => file.set_len(len), + } + } + + pub fn metadata(&self) -> Result { + match self { + VfsFile::Atomic(atomic_file) => atomic_file.fp.metadata(), + VfsFile::Normal { file, .. } => file.metadata(), + } + } +} + +impl AsRawFd for VfsFile { + fn as_raw_fd(&self) -> std::os::unix::prelude::RawFd { + match self { + VfsFile::Atomic(atomic_file) => atomic_file.fp.as_raw_fd(), + VfsFile::Normal { file, .. } => file.as_raw_fd(), + } + } +} + +impl Seek for VfsFile { + fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { + match self { + VfsFile::Atomic(atomic_file) => atomic_file.seek(pos), + VfsFile::Normal { file, .. } => file.seek(pos), + } + } +} + +impl Read for VfsFile { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + match self { + VfsFile::Atomic(atomic_file) => atomic_file.fp.read(buf), + VfsFile::Normal { file, .. } => file.read(buf), + } + } +} + +impl Write for VfsFile { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + match self { + VfsFile::Atomic(atomic_file) => atomic_file.fp.write(buf), + VfsFile::Normal { file, .. } => file.write(buf), + } + } + + fn flush(&mut self) -> std::io::Result<()> { + match self { + VfsFile::Atomic(atomic_file) => atomic_file.fp.flush(), + VfsFile::Normal { file, .. } => file.flush(), + } + } +} + +impl Drop for VfsFile { + fn drop(&mut self) { + if let VfsFile::Normal { + path, + check_ambig: Some(old), + .. + } = self + { + avoid_timestamp_ambiguity(path, old) + } + } +} + +/// Records the number of times we've fixed a timestamp ambiguity, only +/// applicable for tests. +#[cfg(test)] +static TIMESTAMP_FIXES_CALLS: AtomicUsize = AtomicUsize::new(0); + +fn avoid_timestamp_ambiguity(path: &Path, old: &Metadata) { + if let Ok(new) = path.metadata() { + let is_ambiguous = is_filetime_ambiguous(&new, old); + if is_ambiguous { + let advanced = + filetime::FileTime::from_unix_time(old.mtime() + 1, 0); + if filetime::set_file_times(path, advanced, advanced).is_ok() { + #[cfg(test)] + { + TIMESTAMP_FIXES_CALLS.fetch_add(1, Ordering::Relaxed); + } + } + } + } +} + +/// Examine whether new stat is ambiguous against old one +/// +/// "S[N]" below means stat of a file at N-th change: +/// +/// - S[n-1].ctime < S[n].ctime: can detect change of a file +/// - S[n-1].ctime == S[n].ctime +/// - S[n-1].ctime < S[n].mtime: means natural advancing (*1) +/// - S[n-1].ctime == S[n].mtime: is ambiguous (*2) +/// - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care) +/// - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care) +/// +/// Case (*2) above means that a file was changed twice or more at +/// same time in sec (= S[n-1].ctime), and comparison of timestamp +/// is ambiguous. +/// +/// Base idea to avoid such ambiguity is "advance mtime 1 sec, if +/// timestamp is ambiguous". +/// +/// But advancing mtime only in case (*2) doesn't work as +/// expected, because naturally advanced S[n].mtime in case (*1) +/// might be equal to manually advanced S[n-1 or earlier].mtime. +/// +/// Therefore, all "S[n-1].ctime == S[n].ctime" cases should be +/// treated as ambiguous regardless of mtime, to avoid overlooking +/// by confliction between such mtime. +/// +/// Advancing mtime "if isambig(new, old)" ensures "S[n-1].mtime != +/// S[n].mtime", even if size of a file isn't changed. +fn is_filetime_ambiguous(new: &Metadata, old: &Metadata) -> bool { + new.ctime() == old.ctime() +} + /// Writable file object that atomically updates a file /// /// All writes will go to a temporary copy of the original file. Call @@ -302,14 +489,7 @@ if self.check_ambig { if let Ok(stat) = target.metadata() { std::fs::rename(&self.temp_path, &target)?; - let new_stat = std::fs::metadata(&target)?; - let ctime = new_stat.ctime(); - let is_ambiguous = ctime == stat.ctime(); - if is_ambiguous { - let advanced = - filetime::FileTime::from_unix_time(ctime + 1, 0); - filetime::set_file_times(target, advanced, advanced)?; - } + avoid_timestamp_ambiguity(&target, &stat); } else { std::fs::rename(&self.temp_path, target)?; } @@ -339,20 +519,21 @@ /// filesystem layer (like passing one from Python). pub trait Vfs: Sync + Send + DynClone { // TODO make `open` readonly and make `open_read` an `open_write` - fn open(&self, filename: &Path) -> Result; - fn open_read(&self, filename: &Path) -> Result; - fn open_check_ambig( + fn open(&self, filename: &Path) -> Result; + fn open_read(&self, filename: &Path) -> Result; + fn open_check_ambig(&self, filename: &Path) -> Result; + fn create( &self, filename: &Path, - ) -> Result; - fn create(&self, filename: &Path) -> Result; + check_ambig: bool, + ) -> Result; /// Must truncate the new file if exist fn create_atomic( &self, filename: &Path, check_ambig: bool, - ) -> Result; - fn file_size(&self, file: &File) -> Result; + ) -> Result; + fn file_size(&self, file: &VfsFile) -> Result; fn exists(&self, filename: &Path) -> bool; fn unlink(&self, filename: &Path) -> Result<(), HgError>; fn rename( @@ -368,7 +549,7 @@ /// These methods will need to be implemented once `rhg` (and other) non-Python /// users of `hg-core` start doing more on their own, like writing to files. impl Vfs for VfsImpl { - fn open(&self, filename: &Path) -> Result { + fn open(&self, filename: &Path) -> Result { if self.readonly { return Err(HgError::abort( "write access in a readonly vfs", @@ -380,25 +561,28 @@ let path = self.base.join(filename); copy_in_place_if_hardlink(&path)?; - OpenOptions::new() - .create(false) - .create_new(false) - .write(true) - .read(true) - .open(&path) - .when_writing_file(&path) + Ok(VfsFile::normal( + OpenOptions::new() + .create(false) + .create_new(false) + .write(true) + .read(true) + .open(&path) + .when_writing_file(&path)?, + path.to_owned(), + )) } - fn open_read(&self, filename: &Path) -> Result { + fn open_read(&self, filename: &Path) -> Result { // TODO auditpath let path = self.base.join(filename); - std::fs::File::open(&path).when_reading_file(&path) + Ok(VfsFile::normal( + std::fs::File::open(&path).when_reading_file(&path)?, + filename.to_owned(), + )) } - fn open_check_ambig( - &self, - filename: &Path, - ) -> Result { + fn open_check_ambig(&self, filename: &Path) -> Result { if self.readonly { return Err(HgError::abort( "write access in a readonly vfs", @@ -410,16 +594,23 @@ let path = self.base.join(filename); copy_in_place_if_hardlink(&path)?; - // TODO auditpath, check ambig - OpenOptions::new() - .write(true) - .read(true) // Can be used for reading to save on `open` calls - .create(false) - .open(&path) - .when_reading_file(&path) + // TODO auditpath + VfsFile::normal_check_ambig( + OpenOptions::new() + .write(true) + .read(true) // Can be used for reading to save on `open` calls + .create(false) + .open(&path) + .when_reading_file(&path)?, + path.to_owned(), + ) } - fn create(&self, filename: &Path) -> Result { + fn create( + &self, + filename: &Path, + check_ambig: bool, + ) -> Result { if self.readonly { return Err(HgError::abort( "write access in a readonly vfs", @@ -431,7 +622,6 @@ let path = self.base.join(filename); let parent = path.parent().expect("file at root"); std::fs::create_dir_all(parent).when_writing_file(parent)?; - // TODO checkambig (wrap File somehow) let file = OpenOptions::new() .create(true) @@ -450,18 +640,26 @@ std::fs::set_permissions(&path, perm).when_writing_file(&path)?; } - Ok(file) + Ok(VfsFile::Normal { + file, + check_ambig: if check_ambig { + Some(path.metadata().when_reading_file(&path)?) + } else { + None + }, + path: path.to_owned(), + }) } fn create_atomic( &self, _filename: &Path, _check_ambig: bool, - ) -> Result { + ) -> Result { todo!() } - fn file_size(&self, file: &File) -> Result { + fn file_size(&self, file: &VfsFile) -> Result { Ok(file .metadata() .map_err(|e| { @@ -495,7 +693,7 @@ &self, from: &Path, to: &Path, - _check_ambig: bool, + check_ambig: bool, ) -> Result<(), HgError> { if self.readonly { return Err(HgError::abort( @@ -504,15 +702,30 @@ None, )); } - // TODO checkambig + let old_stat = if check_ambig { + Some( + from.metadata() + .when_reading_file(from) + .io_not_found_as_none()?, + ) + } else { + None + }; let from = self.base.join(from); let to = self.base.join(to); - std::fs::rename(&from, &to) - .with_context(|| IoErrorContext::RenamingFile { from, to }) + std::fs::rename(&from, &to).with_context(|| { + IoErrorContext::RenamingFile { + from, + to: to.to_owned(), + } + })?; + if let Some(Some(old)) = old_stat { + avoid_timestamp_ambiguity(&to, &old); + } + Ok(()) } fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> { - // TODO checkambig? let from = self.base.join(from); let to = self.base.join(to); std::fs::copy(&from, &to) @@ -598,46 +811,47 @@ } impl Vfs for FnCacheVfs { - fn open(&self, filename: &Path) -> Result { + fn open(&self, filename: &Path) -> Result { let encoded = path_encode(&get_bytes_from_path(filename)); let encoded_path = get_path_from_bytes(&encoded); self.maybe_add_to_fncache(filename, encoded_path)?; self.inner.open(encoded_path) } - fn open_read(&self, filename: &Path) -> Result { + fn open_read(&self, filename: &Path) -> Result { let encoded = path_encode(&get_bytes_from_path(filename)); let filename = get_path_from_bytes(&encoded); self.inner.open_read(filename) } - fn open_check_ambig( - &self, - filename: &Path, - ) -> Result { + fn open_check_ambig(&self, filename: &Path) -> Result { let encoded = path_encode(&get_bytes_from_path(filename)); let filename = get_path_from_bytes(&encoded); self.inner.open_check_ambig(filename) } - fn create(&self, filename: &Path) -> Result { + fn create( + &self, + filename: &Path, + check_ambig: bool, + ) -> Result { let encoded = path_encode(&get_bytes_from_path(filename)); let encoded_path = get_path_from_bytes(&encoded); self.maybe_add_to_fncache(filename, encoded_path)?; - self.inner.create(encoded_path) + self.inner.create(encoded_path, check_ambig) } fn create_atomic( &self, filename: &Path, check_ambig: bool, - ) -> Result { + ) -> Result { let encoded = path_encode(&get_bytes_from_path(filename)); let filename = get_path_from_bytes(&encoded); self.inner.create_atomic(filename, check_ambig) } - fn file_size(&self, file: &File) -> Result { + fn file_size(&self, file: &VfsFile) -> Result { self.inner.file_size(file) } @@ -814,4 +1028,74 @@ assert!(target_path.exists()); assert!(!temp_path.exists()); } + + #[test] + fn test_vfs_file_check_ambig() { + let dir = tempfile::tempdir().unwrap().into_path(); + let file_path = dir.join("file"); + + fn vfs_file_write(file_path: &Path, check_ambig: bool) { + let file = std::fs::OpenOptions::new() + .write(true) + .open(file_path) + .unwrap(); + let old_stat = if check_ambig { + Some(file.metadata().unwrap()) + } else { + None + }; + + let mut vfs_file = VfsFile::Normal { + file, + path: file_path.to_owned(), + check_ambig: old_stat, + }; + vfs_file.write_all(b"contents").unwrap(); + } + + std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(false) + .open(&file_path) + .unwrap(); + + let number_of_writes = 3; + + // Try multiple times, because reproduction of an ambiguity depends + // on "filesystem time" + for _ in 0..5 { + TIMESTAMP_FIXES_CALLS.store(0, Ordering::Relaxed); + vfs_file_write(&file_path, false); + let old_stat = file_path.metadata().unwrap(); + if old_stat.ctime() != old_stat.mtime() { + // subsequent changing never causes ambiguity + continue; + } + + // Repeat atomic write with `check_ambig == true`, to examine + // whether the mtime is advanced multiple times as expected + for _ in 0..number_of_writes { + vfs_file_write(&file_path, true); + } + let new_stat = file_path.metadata().unwrap(); + if !is_filetime_ambiguous(&new_stat, &old_stat) { + // timestamp ambiguity was naturally avoided while repetition + continue; + } + + assert_eq!( + TIMESTAMP_FIXES_CALLS.load(Ordering::Relaxed), + number_of_writes + ); + assert_eq!( + old_stat.mtime() + number_of_writes as i64, + file_path.metadata().unwrap().mtime() + ); + break; + } + // If we've arrived here without breaking, we might not have + // tested anything because the platform is too slow. This test will + // still work on fast platforms. + } } diff -r 735bf027dd1d -r 8d35941689af rust/hg-cpython/src/vfs.rs --- a/rust/hg-cpython/src/vfs.rs Tue Oct 08 16:10:30 2024 +0200 +++ b/rust/hg-cpython/src/vfs.rs Thu Oct 10 15:54:45 2024 +0200 @@ -14,7 +14,7 @@ errors::{HgError, IoResultExt}, exit_codes, utils::files::{get_bytes_from_path, get_path_from_bytes}, - vfs::Vfs, + vfs::{Vfs, VfsFile}, }; /// Wrapper around a Python VFS object to call back into Python from `hg-core`. @@ -141,46 +141,47 @@ }); impl Vfs for PyVfs { - fn open(&self, filename: &Path) -> Result { + fn open(&self, filename: &Path) -> Result { self.inner_open(filename, false, false, false, true) - .map(|(f, _)| f) + .map(|(f, _)| VfsFile::normal(f, filename.to_owned())) } - fn open_read(&self, filename: &Path) -> Result { + fn open_read(&self, filename: &Path) -> Result { self.inner_open(filename, false, false, false, false) - .map(|(f, _)| f) + .map(|(f, _)| VfsFile::normal(f, filename.to_owned())) } - fn open_check_ambig( + fn open_check_ambig(&self, filename: &Path) -> Result { + self.inner_open(filename, false, true, false, true) + .map(|(f, _)| VfsFile::normal(f, filename.to_owned())) + } + + fn create( &self, filename: &Path, - ) -> Result { - self.inner_open(filename, false, true, false, true) - .map(|(f, _)| f) - } - - fn create(&self, filename: &Path) -> Result { - self.inner_open(filename, true, false, false, true) - .map(|(f, _)| f) + check_ambig: bool, + ) -> Result { + self.inner_open(filename, true, check_ambig, false, true) + .map(|(f, _)| VfsFile::normal(f, filename.to_owned())) } fn create_atomic( &self, filename: &Path, check_ambig: bool, - ) -> Result { + ) -> Result { self.inner_open(filename, true, false, true, true).map( |(fp, temp_name)| { - hg::vfs::AtomicFile::from_file( + VfsFile::Atomic(hg::vfs::AtomicFile::from_file( fp, check_ambig, temp_name.expect("temp name should exist"), filename.to_owned(), - ) + )) }, ) } - fn file_size(&self, file: &File) -> Result { + fn file_size(&self, file: &VfsFile) -> Result { let gil = &Python::acquire_gil(); let py = gil.python(); let raw_fd = file.as_raw_fd();