changeset 52181:3d797007905d

rust: populate mmaps in a separate thread if possible Same rationale as b619ba39d10a.
author Raphaël Gomès <rgomes@octobus.net>
date Thu, 01 Aug 2024 11:27:20 +0200
parents 1032bb0ef365
children bd8081e9fd62
files rust/hg-core/src/revlog/mod.rs
diffstat 1 files changed, 53 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/mod.rs	Wed Jul 31 15:41:08 2024 +0200
+++ b/rust/hg-core/src/revlog/mod.rs	Thu Aug 01 11:27:20 2024 +0200
@@ -446,6 +446,48 @@
 
 type IndexData = Box<dyn Deref<Target = [u8]> + Send + Sync>;
 
+/// TODO We should check for version 5.14+ at runtime, but we either should
+/// add the `nix` dependency to get it efficiently, or vendor the code to read
+/// both of which are overkill for such a feature. If we need this dependency
+/// for more things later, we'll use it here too.
+#[cfg(target_os = "linux")]
+fn can_advise_populate_read() -> bool {
+    true
+}
+
+#[cfg(not(target_os = "linux"))]
+fn can_advise_populate_read() -> bool {
+    false
+}
+
+/// Call `madvise` on the mmap with `MADV_POPULATE_READ` in a separate thread
+/// to populate the mmap in the background for a small perf improvement.
+#[cfg(target_os = "linux")]
+fn advise_populate_read_mmap(mmap: &memmap2::Mmap) {
+    const MADV_POPULATE_READ: i32 = 22;
+
+    // This is fine because the mmap is still referenced for at least
+    // the duration of this function, and the kernel will reject any wrong
+    // address.
+    let ptr = mmap.as_ptr() as u64;
+    let len = mmap.len();
+
+    // Fire and forget. The `JoinHandle` returned by `spawn` is dropped right
+    // after the call, the thread is thus detached. We don't care about success
+    // or failure here.
+    std::thread::spawn(move || unsafe {
+        // mmap's pointer is always page-aligned on Linux. In the case of
+        // file-based mmap (which is our use-case), the length should be
+        // correct. If not, it's not a safety concern as the kernel will just
+        // ignore unmapped pages and return ENOMEM, which we will promptly
+        // ignore, because we don't care about any errors.
+        libc::madvise(ptr as *mut libc::c_void, len, MADV_POPULATE_READ);
+    });
+}
+
+#[cfg(not(target_os = "linux"))]
+fn advise_populate_read_mmap(mmap: &memmap2::Mmap) {}
+
 /// Open the revlog [`Index`] at `index_path`, through the `store_vfs` and the
 /// given `options`. This controls whether (and how) we `mmap` the index file,
 /// and returns an empty buffer if the index does not exist on disk.
@@ -465,8 +507,12 @@
                 if size >= threshold {
                     // TODO madvise populate read in a background thread
                     let mut mmap_options = MmapOptions::new();
-                    // This does nothing on platforms where it's not defined
-                    mmap_options.populate();
+                    if !can_advise_populate_read() {
+                        // Fall back to populating in the main thread if
+                        // post-creation advice is not supported.
+                        // Does nothing on platforms where it's not defined.
+                        mmap_options.populate();
+                    }
                     // Safety is "enforced" by locks and assuming other
                     // processes are well-behaved. If any misbehaving or
                     // malicious process does touch the index, it could lead
@@ -476,6 +522,11 @@
                     // TODO linux: set the immutable flag with `chattr(1)`?
                     let mmap = unsafe { mmap_options.map(&file) }
                         .when_reading_file(index_path)?;
+
+                    if can_advise_populate_read() {
+                        advise_populate_read_mmap(&mmap);
+                    }
+
                     Some(Box::new(mmap) as IndexData)
                 } else {
                     None