5 use crate::utils::files::{get_bytes_from_path, get_path_from_bytes}; |
5 use crate::utils::files::{get_bytes_from_path, get_path_from_bytes}; |
6 use dyn_clone::DynClone; |
6 use dyn_clone::DynClone; |
7 use format_bytes::format_bytes; |
7 use format_bytes::format_bytes; |
8 use memmap2::{Mmap, MmapOptions}; |
8 use memmap2::{Mmap, MmapOptions}; |
9 use rand::distributions::{Alphanumeric, DistString}; |
9 use rand::distributions::{Alphanumeric, DistString}; |
10 use std::fs::{File, OpenOptions}; |
10 use std::fs::{File, Metadata, OpenOptions}; |
11 use std::io::{ErrorKind, Seek, Write}; |
11 use std::io::{ErrorKind, Read, Seek, Write}; |
|
12 use std::os::fd::AsRawFd; |
12 use std::os::unix::fs::{MetadataExt, PermissionsExt}; |
13 use std::os::unix::fs::{MetadataExt, PermissionsExt}; |
13 use std::path::{Path, PathBuf}; |
14 use std::path::{Path, PathBuf}; |
|
15 #[cfg(test)] |
|
16 use std::sync::atomic::AtomicUsize; |
|
17 #[cfg(test)] |
|
18 use std::sync::atomic::Ordering; |
14 use std::sync::OnceLock; |
19 use std::sync::OnceLock; |
15 |
20 |
16 /// Filesystem access abstraction for the contents of a given "base" diretory |
21 /// Filesystem access abstraction for the contents of a given "base" diretory |
17 #[derive(Clone)] |
22 #[derive(Clone)] |
18 pub struct VfsImpl { |
23 pub struct VfsImpl { |
201 _ => Ok(None), |
206 _ => Ok(None), |
202 }, |
207 }, |
203 } |
208 } |
204 } |
209 } |
205 |
210 |
|
211 /// Abstraction over the files handled by a [`Vfs`]. |
|
212 #[derive(Debug)] |
|
213 pub enum VfsFile { |
|
214 Atomic(AtomicFile), |
|
215 |
|
216 Normal { |
|
217 file: File, |
|
218 path: PathBuf, |
|
219 /// If `Some`, check (and maybe fix) this file's timestamp ambiguity. |
|
220 /// See [`is_filetime_ambiguous`]. |
|
221 check_ambig: Option<Metadata>, |
|
222 }, |
|
223 } |
|
224 |
|
225 impl VfsFile { |
|
226 pub fn normal(file: File, path: PathBuf) -> Self { |
|
227 Self::Normal { |
|
228 file, |
|
229 check_ambig: None, |
|
230 path, |
|
231 } |
|
232 } |
|
233 pub fn normal_check_ambig( |
|
234 file: File, |
|
235 path: PathBuf, |
|
236 ) -> Result<Self, HgError> { |
|
237 Ok(Self::Normal { |
|
238 file, |
|
239 check_ambig: Some(path.metadata().when_reading_file(&path)?), |
|
240 path, |
|
241 }) |
|
242 } |
|
243 pub fn try_clone(&self) -> Result<VfsFile, HgError> { |
|
244 Ok(match self { |
|
245 VfsFile::Atomic(AtomicFile { |
|
246 fp, |
|
247 temp_path, |
|
248 check_ambig, |
|
249 target_name, |
|
250 is_open, |
|
251 }) => Self::Atomic(AtomicFile { |
|
252 fp: fp.try_clone().when_reading_file(temp_path)?, |
|
253 temp_path: temp_path.clone(), |
|
254 check_ambig: *check_ambig, |
|
255 target_name: target_name.clone(), |
|
256 is_open: *is_open, |
|
257 }), |
|
258 VfsFile::Normal { |
|
259 file, |
|
260 check_ambig, |
|
261 path, |
|
262 } => Self::Normal { |
|
263 file: file.try_clone().when_reading_file(path)?, |
|
264 check_ambig: check_ambig.clone(), |
|
265 path: path.to_owned(), |
|
266 }, |
|
267 }) |
|
268 } |
|
269 pub fn set_len(&self, len: u64) -> Result<(), std::io::Error> { |
|
270 match self { |
|
271 VfsFile::Atomic(atomic_file) => atomic_file.fp.set_len(len), |
|
272 VfsFile::Normal { file, .. } => file.set_len(len), |
|
273 } |
|
274 } |
|
275 |
|
276 pub fn metadata(&self) -> Result<std::fs::Metadata, std::io::Error> { |
|
277 match self { |
|
278 VfsFile::Atomic(atomic_file) => atomic_file.fp.metadata(), |
|
279 VfsFile::Normal { file, .. } => file.metadata(), |
|
280 } |
|
281 } |
|
282 } |
|
283 |
|
284 impl AsRawFd for VfsFile { |
|
285 fn as_raw_fd(&self) -> std::os::unix::prelude::RawFd { |
|
286 match self { |
|
287 VfsFile::Atomic(atomic_file) => atomic_file.fp.as_raw_fd(), |
|
288 VfsFile::Normal { file, .. } => file.as_raw_fd(), |
|
289 } |
|
290 } |
|
291 } |
|
292 |
|
293 impl Seek for VfsFile { |
|
294 fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> { |
|
295 match self { |
|
296 VfsFile::Atomic(atomic_file) => atomic_file.seek(pos), |
|
297 VfsFile::Normal { file, .. } => file.seek(pos), |
|
298 } |
|
299 } |
|
300 } |
|
301 |
|
302 impl Read for VfsFile { |
|
303 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { |
|
304 match self { |
|
305 VfsFile::Atomic(atomic_file) => atomic_file.fp.read(buf), |
|
306 VfsFile::Normal { file, .. } => file.read(buf), |
|
307 } |
|
308 } |
|
309 } |
|
310 |
|
311 impl Write for VfsFile { |
|
312 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { |
|
313 match self { |
|
314 VfsFile::Atomic(atomic_file) => atomic_file.fp.write(buf), |
|
315 VfsFile::Normal { file, .. } => file.write(buf), |
|
316 } |
|
317 } |
|
318 |
|
319 fn flush(&mut self) -> std::io::Result<()> { |
|
320 match self { |
|
321 VfsFile::Atomic(atomic_file) => atomic_file.fp.flush(), |
|
322 VfsFile::Normal { file, .. } => file.flush(), |
|
323 } |
|
324 } |
|
325 } |
|
326 |
|
327 impl Drop for VfsFile { |
|
328 fn drop(&mut self) { |
|
329 if let VfsFile::Normal { |
|
330 path, |
|
331 check_ambig: Some(old), |
|
332 .. |
|
333 } = self |
|
334 { |
|
335 avoid_timestamp_ambiguity(path, old) |
|
336 } |
|
337 } |
|
338 } |
|
339 |
|
340 /// Records the number of times we've fixed a timestamp ambiguity, only |
|
341 /// applicable for tests. |
|
342 #[cfg(test)] |
|
343 static TIMESTAMP_FIXES_CALLS: AtomicUsize = AtomicUsize::new(0); |
|
344 |
|
345 fn avoid_timestamp_ambiguity(path: &Path, old: &Metadata) { |
|
346 if let Ok(new) = path.metadata() { |
|
347 let is_ambiguous = is_filetime_ambiguous(&new, old); |
|
348 if is_ambiguous { |
|
349 let advanced = |
|
350 filetime::FileTime::from_unix_time(old.mtime() + 1, 0); |
|
351 if filetime::set_file_times(path, advanced, advanced).is_ok() { |
|
352 #[cfg(test)] |
|
353 { |
|
354 TIMESTAMP_FIXES_CALLS.fetch_add(1, Ordering::Relaxed); |
|
355 } |
|
356 } |
|
357 } |
|
358 } |
|
359 } |
|
360 |
|
361 /// Examine whether new stat is ambiguous against old one |
|
362 /// |
|
363 /// "S[N]" below means stat of a file at N-th change: |
|
364 /// |
|
365 /// - S[n-1].ctime < S[n].ctime: can detect change of a file |
|
366 /// - S[n-1].ctime == S[n].ctime |
|
367 /// - S[n-1].ctime < S[n].mtime: means natural advancing (*1) |
|
368 /// - S[n-1].ctime == S[n].mtime: is ambiguous (*2) |
|
369 /// - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care) |
|
370 /// - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care) |
|
371 /// |
|
372 /// Case (*2) above means that a file was changed twice or more at |
|
373 /// same time in sec (= S[n-1].ctime), and comparison of timestamp |
|
374 /// is ambiguous. |
|
375 /// |
|
376 /// Base idea to avoid such ambiguity is "advance mtime 1 sec, if |
|
377 /// timestamp is ambiguous". |
|
378 /// |
|
379 /// But advancing mtime only in case (*2) doesn't work as |
|
380 /// expected, because naturally advanced S[n].mtime in case (*1) |
|
381 /// might be equal to manually advanced S[n-1 or earlier].mtime. |
|
382 /// |
|
383 /// Therefore, all "S[n-1].ctime == S[n].ctime" cases should be |
|
384 /// treated as ambiguous regardless of mtime, to avoid overlooking |
|
385 /// by confliction between such mtime. |
|
386 /// |
|
387 /// Advancing mtime "if isambig(new, old)" ensures "S[n-1].mtime != |
|
388 /// S[n].mtime", even if size of a file isn't changed. |
|
389 fn is_filetime_ambiguous(new: &Metadata, old: &Metadata) -> bool { |
|
390 new.ctime() == old.ctime() |
|
391 } |
|
392 |
206 /// Writable file object that atomically updates a file |
393 /// Writable file object that atomically updates a file |
207 /// |
394 /// |
208 /// All writes will go to a temporary copy of the original file. Call |
395 /// All writes will go to a temporary copy of the original file. Call |
209 /// [`Self::close`] when you are done writing, and [`Self`] will rename |
396 /// [`Self::close`] when you are done writing, and [`Self`] will rename |
210 /// the temporary copy to the original name, making the changes |
397 /// the temporary copy to the original name, making the changes |
337 |
517 |
338 /// Abstracts over the VFS to allow for different implementations of the |
518 /// Abstracts over the VFS to allow for different implementations of the |
339 /// filesystem layer (like passing one from Python). |
519 /// filesystem layer (like passing one from Python). |
340 pub trait Vfs: Sync + Send + DynClone { |
520 pub trait Vfs: Sync + Send + DynClone { |
341 // TODO make `open` readonly and make `open_read` an `open_write` |
521 // TODO make `open` readonly and make `open_read` an `open_write` |
342 fn open(&self, filename: &Path) -> Result<std::fs::File, HgError>; |
522 fn open(&self, filename: &Path) -> Result<VfsFile, HgError>; |
343 fn open_read(&self, filename: &Path) -> Result<std::fs::File, HgError>; |
523 fn open_read(&self, filename: &Path) -> Result<VfsFile, HgError>; |
344 fn open_check_ambig( |
524 fn open_check_ambig(&self, filename: &Path) -> Result<VfsFile, HgError>; |
|
525 fn create( |
345 &self, |
526 &self, |
346 filename: &Path, |
527 filename: &Path, |
347 ) -> Result<std::fs::File, HgError>; |
528 check_ambig: bool, |
348 fn create(&self, filename: &Path) -> Result<std::fs::File, HgError>; |
529 ) -> Result<VfsFile, HgError>; |
349 /// Must truncate the new file if exist |
530 /// Must truncate the new file if exist |
350 fn create_atomic( |
531 fn create_atomic( |
351 &self, |
532 &self, |
352 filename: &Path, |
533 filename: &Path, |
353 check_ambig: bool, |
534 check_ambig: bool, |
354 ) -> Result<AtomicFile, HgError>; |
535 ) -> Result<VfsFile, HgError>; |
355 fn file_size(&self, file: &File) -> Result<u64, HgError>; |
536 fn file_size(&self, file: &VfsFile) -> Result<u64, HgError>; |
356 fn exists(&self, filename: &Path) -> bool; |
537 fn exists(&self, filename: &Path) -> bool; |
357 fn unlink(&self, filename: &Path) -> Result<(), HgError>; |
538 fn unlink(&self, filename: &Path) -> Result<(), HgError>; |
358 fn rename( |
539 fn rename( |
359 &self, |
540 &self, |
360 from: &Path, |
541 from: &Path, |
493 |
691 |
494 fn rename( |
692 fn rename( |
495 &self, |
693 &self, |
496 from: &Path, |
694 from: &Path, |
497 to: &Path, |
695 to: &Path, |
498 _check_ambig: bool, |
696 check_ambig: bool, |
499 ) -> Result<(), HgError> { |
697 ) -> Result<(), HgError> { |
500 if self.readonly { |
698 if self.readonly { |
501 return Err(HgError::abort( |
699 return Err(HgError::abort( |
502 "write access in a readonly vfs", |
700 "write access in a readonly vfs", |
503 exit_codes::ABORT, |
701 exit_codes::ABORT, |
504 None, |
702 None, |
505 )); |
703 )); |
506 } |
704 } |
507 // TODO checkambig |
705 let old_stat = if check_ambig { |
|
706 Some( |
|
707 from.metadata() |
|
708 .when_reading_file(from) |
|
709 .io_not_found_as_none()?, |
|
710 ) |
|
711 } else { |
|
712 None |
|
713 }; |
508 let from = self.base.join(from); |
714 let from = self.base.join(from); |
509 let to = self.base.join(to); |
715 let to = self.base.join(to); |
510 std::fs::rename(&from, &to) |
716 std::fs::rename(&from, &to).with_context(|| { |
511 .with_context(|| IoErrorContext::RenamingFile { from, to }) |
717 IoErrorContext::RenamingFile { |
|
718 from, |
|
719 to: to.to_owned(), |
|
720 } |
|
721 })?; |
|
722 if let Some(Some(old)) = old_stat { |
|
723 avoid_timestamp_ambiguity(&to, &old); |
|
724 } |
|
725 Ok(()) |
512 } |
726 } |
513 |
727 |
514 fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> { |
728 fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> { |
515 // TODO checkambig? |
|
516 let from = self.base.join(from); |
729 let from = self.base.join(from); |
517 let to = self.base.join(to); |
730 let to = self.base.join(to); |
518 std::fs::copy(&from, &to) |
731 std::fs::copy(&from, &to) |
519 .with_context(|| IoErrorContext::CopyingFile { from, to }) |
732 .with_context(|| IoErrorContext::CopyingFile { from, to }) |
520 .map(|_| ()) |
733 .map(|_| ()) |
596 Ok(()) |
809 Ok(()) |
597 } |
810 } |
598 } |
811 } |
599 |
812 |
600 impl Vfs for FnCacheVfs { |
813 impl Vfs for FnCacheVfs { |
601 fn open(&self, filename: &Path) -> Result<std::fs::File, HgError> { |
814 fn open(&self, filename: &Path) -> Result<VfsFile, HgError> { |
602 let encoded = path_encode(&get_bytes_from_path(filename)); |
815 let encoded = path_encode(&get_bytes_from_path(filename)); |
603 let encoded_path = get_path_from_bytes(&encoded); |
816 let encoded_path = get_path_from_bytes(&encoded); |
604 self.maybe_add_to_fncache(filename, encoded_path)?; |
817 self.maybe_add_to_fncache(filename, encoded_path)?; |
605 self.inner.open(encoded_path) |
818 self.inner.open(encoded_path) |
606 } |
819 } |
607 |
820 |
608 fn open_read(&self, filename: &Path) -> Result<std::fs::File, HgError> { |
821 fn open_read(&self, filename: &Path) -> Result<VfsFile, HgError> { |
609 let encoded = path_encode(&get_bytes_from_path(filename)); |
822 let encoded = path_encode(&get_bytes_from_path(filename)); |
610 let filename = get_path_from_bytes(&encoded); |
823 let filename = get_path_from_bytes(&encoded); |
611 self.inner.open_read(filename) |
824 self.inner.open_read(filename) |
612 } |
825 } |
613 |
826 |
614 fn open_check_ambig( |
827 fn open_check_ambig(&self, filename: &Path) -> Result<VfsFile, HgError> { |
615 &self, |
|
616 filename: &Path, |
|
617 ) -> Result<std::fs::File, HgError> { |
|
618 let encoded = path_encode(&get_bytes_from_path(filename)); |
828 let encoded = path_encode(&get_bytes_from_path(filename)); |
619 let filename = get_path_from_bytes(&encoded); |
829 let filename = get_path_from_bytes(&encoded); |
620 self.inner.open_check_ambig(filename) |
830 self.inner.open_check_ambig(filename) |
621 } |
831 } |
622 |
832 |
623 fn create(&self, filename: &Path) -> Result<std::fs::File, HgError> { |
833 fn create( |
|
834 &self, |
|
835 filename: &Path, |
|
836 check_ambig: bool, |
|
837 ) -> Result<VfsFile, HgError> { |
624 let encoded = path_encode(&get_bytes_from_path(filename)); |
838 let encoded = path_encode(&get_bytes_from_path(filename)); |
625 let encoded_path = get_path_from_bytes(&encoded); |
839 let encoded_path = get_path_from_bytes(&encoded); |
626 self.maybe_add_to_fncache(filename, encoded_path)?; |
840 self.maybe_add_to_fncache(filename, encoded_path)?; |
627 self.inner.create(encoded_path) |
841 self.inner.create(encoded_path, check_ambig) |
628 } |
842 } |
629 |
843 |
630 fn create_atomic( |
844 fn create_atomic( |
631 &self, |
845 &self, |
632 filename: &Path, |
846 filename: &Path, |
633 check_ambig: bool, |
847 check_ambig: bool, |
634 ) -> Result<AtomicFile, HgError> { |
848 ) -> Result<VfsFile, HgError> { |
635 let encoded = path_encode(&get_bytes_from_path(filename)); |
849 let encoded = path_encode(&get_bytes_from_path(filename)); |
636 let filename = get_path_from_bytes(&encoded); |
850 let filename = get_path_from_bytes(&encoded); |
637 self.inner.create_atomic(filename, check_ambig) |
851 self.inner.create_atomic(filename, check_ambig) |
638 } |
852 } |
639 |
853 |
640 fn file_size(&self, file: &File) -> Result<u64, HgError> { |
854 fn file_size(&self, file: &VfsFile) -> Result<u64, HgError> { |
641 self.inner.file_size(file) |
855 self.inner.file_size(file) |
642 } |
856 } |
643 |
857 |
644 fn exists(&self, filename: &Path) -> bool { |
858 fn exists(&self, filename: &Path) -> bool { |
645 let encoded = path_encode(&get_bytes_from_path(filename)); |
859 let encoded = path_encode(&get_bytes_from_path(filename)); |
812 std::fs::read_to_string(&target_path).unwrap() |
1026 std::fs::read_to_string(&target_path).unwrap() |
813 ); |
1027 ); |
814 assert!(target_path.exists()); |
1028 assert!(target_path.exists()); |
815 assert!(!temp_path.exists()); |
1029 assert!(!temp_path.exists()); |
816 } |
1030 } |
817 } |
1031 |
|
1032 #[test] |
|
1033 fn test_vfs_file_check_ambig() { |
|
1034 let dir = tempfile::tempdir().unwrap().into_path(); |
|
1035 let file_path = dir.join("file"); |
|
1036 |
|
1037 fn vfs_file_write(file_path: &Path, check_ambig: bool) { |
|
1038 let file = std::fs::OpenOptions::new() |
|
1039 .write(true) |
|
1040 .open(file_path) |
|
1041 .unwrap(); |
|
1042 let old_stat = if check_ambig { |
|
1043 Some(file.metadata().unwrap()) |
|
1044 } else { |
|
1045 None |
|
1046 }; |
|
1047 |
|
1048 let mut vfs_file = VfsFile::Normal { |
|
1049 file, |
|
1050 path: file_path.to_owned(), |
|
1051 check_ambig: old_stat, |
|
1052 }; |
|
1053 vfs_file.write_all(b"contents").unwrap(); |
|
1054 } |
|
1055 |
|
1056 std::fs::OpenOptions::new() |
|
1057 .write(true) |
|
1058 .create(true) |
|
1059 .truncate(false) |
|
1060 .open(&file_path) |
|
1061 .unwrap(); |
|
1062 |
|
1063 let number_of_writes = 3; |
|
1064 |
|
1065 // Try multiple times, because reproduction of an ambiguity depends |
|
1066 // on "filesystem time" |
|
1067 for _ in 0..5 { |
|
1068 TIMESTAMP_FIXES_CALLS.store(0, Ordering::Relaxed); |
|
1069 vfs_file_write(&file_path, false); |
|
1070 let old_stat = file_path.metadata().unwrap(); |
|
1071 if old_stat.ctime() != old_stat.mtime() { |
|
1072 // subsequent changing never causes ambiguity |
|
1073 continue; |
|
1074 } |
|
1075 |
|
1076 // Repeat atomic write with `check_ambig == true`, to examine |
|
1077 // whether the mtime is advanced multiple times as expected |
|
1078 for _ in 0..number_of_writes { |
|
1079 vfs_file_write(&file_path, true); |
|
1080 } |
|
1081 let new_stat = file_path.metadata().unwrap(); |
|
1082 if !is_filetime_ambiguous(&new_stat, &old_stat) { |
|
1083 // timestamp ambiguity was naturally avoided while repetition |
|
1084 continue; |
|
1085 } |
|
1086 |
|
1087 assert_eq!( |
|
1088 TIMESTAMP_FIXES_CALLS.load(Ordering::Relaxed), |
|
1089 number_of_writes |
|
1090 ); |
|
1091 assert_eq!( |
|
1092 old_stat.mtime() + number_of_writes as i64, |
|
1093 file_path.metadata().unwrap().mtime() |
|
1094 ); |
|
1095 break; |
|
1096 } |
|
1097 // If we've arrived here without breaking, we might not have |
|
1098 // tested anything because the platform is too slow. This test will |
|
1099 // still work on fast platforms. |
|
1100 } |
|
1101 } |