hgdemandimport: apply lazy module loading to sys.meta_path finders
Python's `sys.meta_path` finders are the primary objects whose job it
is to find a module at import time. When `import` is called, Python
iterates objects in this list and calls `o.find_spec(...)` to find
a `ModuleSpec` (or None if the module couldn't be found by that
finder). If no meta path finder can find a module, import fails.
One of the default meta path finders is `PathFinder`. Its job is to
import modules from the filesystem and is probably the most important
importer. This finder looks at `sys.path` and `sys.path_hooks` to do
its job.
The `ModuleSpec` returned by `MetaPathImporter.find_spec()` has a
`loader` attribute, which defines the concrete module loader to use.
`sys.path_hooks` is a hook point for teaching `PathFinder` to
instantiate custom loader types.
Previously, we injected a custom `sys.path_hook` that told `PathFinder`
to wrap the default loaders with a loader that creates a module object
that is lazy.
This approach worked. But its main limitation was that it only applied
to the `PathFinder` meta path importer. There are other meta path
importers that are registered. And in the case of PyOxidizer loading
modules from memory, `PathFinder` doesn't come into play since
PyOxidizer's own meta path importer was handling all imports.
This commit changes our approach to lazy module loading by proxying
all meta path importers. Specifically, we overload the `find_spec()`
method to swap in a wrapped loader on the `ModuleSpec` before it
is returned. The end result of this is all meta path importers should
be lazy.
As much as I would have loved to utilize .__class__ manipulation to
achieve this, some meta path importers are implemented in C/Rust
in such a way that they cannot be monkeypatched. This is why we
use __getattribute__ to define a proxy.
Also, this change could theoretically open us up to regressions in
meta path importers whose loader is creating module objects which
can't be monkeypatched. But I'm not aware of any of these in the
wild. So I think we'll be safe.
According to hyperfine, this change yields a decent startup time win of
5-6ms:
```
Benchmark #1: ~/.pyenv/versions/3.6.10/bin/python ./hg version
Time (mean ± σ): 86.8 ms ± 0.5 ms [User: 78.0 ms, System: 8.7 ms]
Range (min … max): 86.0 ms … 89.1 ms 50 runs
Time (mean ± σ): 81.1 ms ± 2.7 ms [User: 74.5 ms, System: 6.5 ms]
Range (min … max): 77.8 ms … 90.5 ms 50 runs
Benchmark #2: ~/.pyenv/versions/3.7.6/bin/python ./hg version
Time (mean ± σ): 78.9 ms ± 0.6 ms [User: 70.2 ms, System: 8.7 ms]
Range (min … max): 78.1 ms … 81.2 ms 50 runs
Time (mean ± σ): 73.4 ms ± 0.6 ms [User: 65.3 ms, System: 8.0 ms]
Range (min … max): 72.4 ms … 75.7 ms 50 runs
Benchmark #3: ~/.pyenv/versions/3.8.1/bin/python ./hg version
Time (mean ± σ): 78.1 ms ± 0.6 ms [User: 70.2 ms, System: 7.9 ms]
Range (min … max): 77.4 ms … 80.9 ms 50 runs
Time (mean ± σ): 72.1 ms ± 0.4 ms [User: 64.4 ms, System: 7.6 ms]
Range (min … max): 71.4 ms … 74.1 ms 50 runs
```
Differential Revision: https://phab.mercurial-scm.org/D7954
/*
bdiff.c - efficient binary diff extension for Mercurial
Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
Based roughly on Python difflib
*/
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include "bdiff.h"
#include "bitmanipulation.h"
#include "compat.h"
/* Hash implementation from diffutils */
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof(v) * CHAR_BIT - (n)))
#define HASH(h, c) ((c) + ROL(h, 7))
struct pos {
int pos, len;
};
int bdiff_splitlines(const char *a, ssize_t len, struct bdiff_line **lr)
{
unsigned hash;
int i;
const char *p, *b = a;
const char *const plast = a + len - 1;
struct bdiff_line *l;
/* count the lines */
i = 1; /* extra line for sentinel */
for (p = a; p < plast; p++) {
if (*p == '\n') {
i++;
}
}
if (p == plast) {
i++;
}
*lr = l = (struct bdiff_line *)calloc(i, sizeof(struct bdiff_line));
if (!l) {
return -1;
}
/* build the line array and calculate hashes */
hash = 0;
for (p = a; p < plast; p++) {
hash = HASH(hash, *p);
if (*p == '\n') {
l->hash = hash;
hash = 0;
l->len = p - b + 1;
l->l = b;
l->n = INT_MAX;
l++;
b = p + 1;
}
}
if (p == plast) {
hash = HASH(hash, *p);
l->hash = hash;
l->len = p - b + 1;
l->l = b;
l->n = INT_MAX;
l++;
}
/* set up a sentinel */
l->hash = 0;
l->len = 0;
l->l = a + len;
return i - 1;
}
static inline int cmp(struct bdiff_line *a, struct bdiff_line *b)
{
return a->hash != b->hash || a->len != b->len ||
memcmp(a->l, b->l, a->len);
}
static int equatelines(struct bdiff_line *a, int an, struct bdiff_line *b,
int bn)
{
int i, j, buckets = 1, t, scale;
struct pos *h = NULL;
/* build a hash table of the next highest power of 2 */
while (buckets < bn + 1) {
buckets *= 2;
}
/* try to allocate a large hash table to avoid collisions */
for (scale = 4; scale; scale /= 2) {
h = (struct pos *)calloc(buckets, scale * sizeof(struct pos));
if (h) {
break;
}
}
if (!h) {
return 0;
}
buckets = buckets * scale - 1;
/* clear the hash table */
for (i = 0; i <= buckets; i++) {
h[i].pos = -1;
h[i].len = 0;
}
/* add lines to the hash table chains */
for (i = 0; i < bn; i++) {
/* find the equivalence class */
for (j = b[i].hash & buckets; h[j].pos != -1;
j = (j + 1) & buckets) {
if (!cmp(b + i, b + h[j].pos)) {
break;
}
}
/* add to the head of the equivalence class */
b[i].n = h[j].pos;
b[i].e = j;
h[j].pos = i;
h[j].len++; /* keep track of popularity */
}
/* compute popularity threshold */
t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
/* match items in a to their equivalence class in b */
for (i = 0; i < an; i++) {
/* find the equivalence class */
for (j = a[i].hash & buckets; h[j].pos != -1;
j = (j + 1) & buckets) {
if (!cmp(a + i, b + h[j].pos)) {
break;
}
}
a[i].e = j; /* use equivalence class for quick compare */
if (h[j].len <= t) {
a[i].n = h[j].pos; /* point to head of match list */
} else {
a[i].n = -1; /* too popular */
}
}
/* discard hash tables */
free(h);
return 1;
}
static int longest_match(struct bdiff_line *a, struct bdiff_line *b,
struct pos *pos, int a1, int a2, int b1, int b2,
int *omi, int *omj)
{
int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf;
/* window our search on large regions to better bound
worst-case performance. by choosing a window at the end, we
reduce skipping overhead on the b chains. */
if (a2 - a1 > 30000) {
a1 = a2 - 30000;
}
half = (a1 + a2 - 1) / 2;
bhalf = (b1 + b2 - 1) / 2;
for (i = a1; i < a2; i++) {
/* skip all lines in b after the current block */
for (j = a[i].n; j >= b2; j = b[j].n) {
;
}
/* loop through all lines match a[i] in b */
for (; j >= b1; j = b[j].n) {
/* does this extend an earlier match? */
for (k = 1; j - k >= b1 && i - k >= a1; k++) {
/* reached an earlier match? */
if (pos[j - k].pos == i - k) {
k += pos[j - k].len;
break;
}
/* previous line mismatch? */
if (a[i - k].e != b[j - k].e) {
break;
}
}
pos[j].pos = i;
pos[j].len = k;
/* best match so far? we prefer matches closer
to the middle to balance recursion */
if (k > mk) {
/* a longer match */
mi = i;
mj = j;
mk = k;
} else if (k == mk) {
if (i > mi && i <= half && j > b1) {
/* same match but closer to half */
mi = i;
mj = j;
} else if (i == mi && (mj > bhalf || i == a1)) {
/* same i but best earlier j */
mj = j;
}
}
}
}
if (mk) {
mi = mi - mk + 1;
mj = mj - mk + 1;
}
/* expand match to include subsequent popular lines */
while (mi + mk < a2 && mj + mk < b2 && a[mi + mk].e == b[mj + mk].e) {
mk++;
}
*omi = mi;
*omj = mj;
return mk;
}
static struct bdiff_hunk *recurse(struct bdiff_line *a, struct bdiff_line *b,
struct pos *pos, int a1, int a2, int b1,
int b2, struct bdiff_hunk *l)
{
int i, j, k;
while (1) {
/* find the longest match in this chunk */
k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
if (!k) {
return l;
}
/* and recurse on the remaining chunks on either side */
l = recurse(a, b, pos, a1, i, b1, j, l);
if (!l) {
return NULL;
}
l->next =
(struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
if (!l->next) {
return NULL;
}
l = l->next;
l->a1 = i;
l->a2 = i + k;
l->b1 = j;
l->b2 = j + k;
l->next = NULL;
/* tail-recursion didn't happen, so do equivalent iteration */
a1 = i + k;
b1 = j + k;
}
}
int bdiff_diff(struct bdiff_line *a, int an, struct bdiff_line *b, int bn,
struct bdiff_hunk *base)
{
struct bdiff_hunk *curr;
struct pos *pos;
int t, count = 0;
/* allocate and fill arrays */
t = equatelines(a, an, b, bn);
pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
if (pos && t) {
/* generate the matching block list */
curr = recurse(a, b, pos, 0, an, 0, bn, base);
if (!curr) {
return -1;
}
/* sentinel end hunk */
curr->next =
(struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
if (!curr->next) {
return -1;
}
curr = curr->next;
curr->a1 = curr->a2 = an;
curr->b1 = curr->b2 = bn;
curr->next = NULL;
}
free(pos);
/* normalize the hunk list, try to push each hunk towards the end */
for (curr = base->next; curr; curr = curr->next) {
struct bdiff_hunk *next = curr->next;
if (!next) {
break;
}
if (curr->a2 == next->a1 || curr->b2 == next->b1) {
while (curr->a2 < an && curr->b2 < bn &&
next->a1 < next->a2 && next->b1 < next->b2 &&
!cmp(a + curr->a2, b + curr->b2)) {
curr->a2++;
next->a1++;
curr->b2++;
next->b1++;
}
}
}
for (curr = base->next; curr; curr = curr->next) {
count++;
}
return count;
}
/* deallocate list of hunks; l may be NULL */
void bdiff_freehunks(struct bdiff_hunk *l)
{
struct bdiff_hunk *n;
for (; l; l = n) {
n = l->next;
free(l);
}
}