Mercurial > hg
changeset 51742:b619ba39d10a
mmap: populate mapping in a background thread
When possible, we populate the memory mapping in a second thread. The mmap
population does not only read the data from disk to memory. It also actually
fill the memory mapping between process memory address and the physical memory
used by the file system cache containing the mmap'ed data.
Doing so buy back the slowdown from pre-population when it matters. When most
data is accessed, only a few page fault will occurs, while the background thread
fill the memory controller. When few data is accessed, the non-blocking mmap
won't have to wait for all data to be populated.
Here is a few example of improvement seen in benchmark around unbundle and push:
### data-env-vars.name = netbeans-2018-08-01-zstd-sparse-revlog
# benchmark.name = hg.command.unbundle
# benchmark.variants.issue6528 = disabled
# benchmark.variants.reuse-external-delta-parent = yes
# benchmark.variants.revs = any-100-extra-rev
before: 0.758101
after: 0.732129 (-3.43%, -0.03)
## data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog
before: 1.519941
after: 1.503473 (-1.08%, -0.02)
### data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog
# benchmark.name = hg.command.push
# bin-env-vars.hg.flavor = default
# benchmark.variants.issue6528 = disabled
# benchmark.variants.protocol = ssh
# benchmark.variants.reuse-external-delta-parent = yes
# benchmark.variants.revs = any-1-extra-rev
before: 4.801442
after: 4.695810 (-1.46%, -0.07)
# benchmark.variants.revs = any-100-extra-rev
before: 4.848596
after: 4.794075 (-1.12%, -0.05)
# bin-env-vars.hg.flavor = rust
# benchmark.variants.revs = any-1-extra-rev
before: 4.818410
after: 4.700053 (-2.46%, -0.12)
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 09 Jul 2024 20:08:48 +0200 |
parents | d748fd2647f8 |
children | 55677d115045 |
files | mercurial/cext/osutil.c mercurial/util.py |
diffstat | 2 files changed, 60 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/cext/osutil.c Thu Jul 25 14:40:38 2024 -0400 +++ b/mercurial/cext/osutil.c Tue Jul 09 20:08:48 2024 +0200 @@ -36,6 +36,12 @@ #endif #endif +#ifndef _WIN32 +#include <sys/mman.h> +#include <pthread.h> +#endif + + #ifdef __APPLE__ #include <sys/attr.h> #include <sys/vnode.h> @@ -1203,6 +1209,49 @@ } #endif +#ifdef MADV_POPULATE_READ + +typedef struct { + void * mmap_address; + size_t length; +} mmap_info; + +static void _mmap_populate(mmap_info *info) { + /* We explicitly does not check the return value as we don't care about it. + * the madvise is here to help performance and we don't care if it fails + * (for example because the mapping is no longer valid) */ + void * mmap_address = info->mmap_address; + size_t length = info->length; + free(info); + madvise(mmap_address, length, MADV_POPULATE_READ); +} + +static PyObject *background_mmap_populate(PyObject *self, PyObject *mmap) { + Py_buffer b; + pthread_t thread_id; + mmap_info *info; + if (PyObject_GetBuffer(mmap, &b, PyBUF_CONTIG_RO | PyBUF_C_CONTIGUOUS) == -1) { + return NULL; + } + info = (mmap_info *)malloc(sizeof(mmap_info)); + info->mmap_address=b.buf; + info->length=b.len; + /* note: for very large map, we could spin multiple thread populating + * different area */ + pthread_create(&thread_id, NULL, (void *) &_mmap_populate, info); + /* We don't keep track of this thread as it is fine for it to die when we + * exit. */ + pthread_detach(thread_id); + /* We release the PyBuffer in the main thread to let the object be garbage + * collected as soon as possible. This might result in the memory map being + * closed while the background thread is working. That will result in a + * error in the background thread we can ignore. */ + PyBuffer_Release(&b); + Py_RETURN_NONE; +} + +#endif + static char osutil_doc[] = "Native operating system services."; static PyMethodDef methods[] = { @@ -1237,6 +1286,10 @@ "Is a CoreGraphics session available?" }, #endif +#ifdef MADV_POPULATE_READ + {"background_mmap_populate", (PyCFunction)background_mmap_populate, METH_O, + "populate a mmap in the background\n"}, +#endif {NULL, NULL} };
--- a/mercurial/util.py Thu Jul 25 14:40:38 2024 -0400 +++ b/mercurial/util.py Tue Jul 09 20:08:48 2024 +0200 @@ -451,7 +451,9 @@ def has_mmap_populate(): - return hasattr(mmap, 'MAP_POPULATE') + return hasattr(osutil, "background_mmap_populate") or hasattr( + mmap, 'MAP_POPULATE' + ) def mmapread(fp, size=None, pre_populate=True): @@ -475,10 +477,13 @@ size = 0 fd = getattr(fp, 'fileno', lambda: fp)() flags = mmap.MAP_PRIVATE - if pre_populate: + bg_populate = hasattr(osutil, "background_mmap_populate") + if pre_populate and not bg_populate: flags |= getattr(mmap, 'MAP_POPULATE', 0) try: m = mmap.mmap(fd, size, flags=flags, prot=mmap.PROT_READ) + if pre_populate and bg_populate: + osutil.background_mmap_populate(m) return m except ValueError: # Empty files cannot be mmapped, but mmapread should still work. Check