stream-clone: avoid opening a revlog in case we do not need it
Opening an revlog has a cost, especially if it is inline as we have to scan the
file and construct an index.
To prevent the associated slowdown, we just do a minimal scan to check that an
inline file is still inline, and simply stream the file without creating a
revlog when we can.
This provides a big boost compared to the previous changeset, even if the full
generation is still penalized by the initial gathering of information.
All benchmarks are run on linux with Python 3.10.7.
# benchmark.name = hg.exchange.stream.generate
# benchmark.variants.version = v2
### Compared to the previous changesets
We get a large win all across the board!
# mercurial-2018-08-01-zstd-sparse-revlog
before: 0.250694 seconds
after: 0.105986 seconds (-57.72%)
# pypy-2018-08-01-zstd-sparse-revlog
before: 3.885657 seconds
after: 1.709748 seconds (-56.00%)
# netbeans-2018-08-01-zstd-sparse-revlog
before: 16.679371 seconds
after: 7.687469 seconds (-53.91%)
# mozilla-central-2018-08-01-zstd-sparse-revlog
before: 38.575482 seconds
after: 17.520316 seconds (-54.58%)
# mozilla-try-2019-02-18-zstd-sparse-revlog
before: 81.160994 seconds
after: 37.073753 seconds (-54.32%)
### Compared to 6.4.3
We are still significantly slower than 6.4.3, the extra time is usually twice
slower than the extra time we observe on the locked section, which is a quite
interesting information.
Except for mercurial-central that is much faster. That discrepancy is not really
explained yet.
# mercurial-2018-08-01-zstd-sparse-revlog
6.4.3: 0.072560 seconds
after: 0.105986 seconds (+46.07%) (- 0.03 seconds)
# pypy-2018-08-01-zstd-sparse-revlog
6.4.3: 1.211193 seconds
after: 1.709748 seconds (+41.16%) (-0.45 seconds)
# netbeans-2018-08-01-zstd-sparse-revlog
6.4.3: 4.932843 seconds
after: 7.687469 seconds (+55.84%) (-2.75 seconds)
# mozilla-central-2018-08-01-zstd-sparse-revlog
6.4.3: 34.012226 seconds
after: 17.520316 seconds (-48.49%) (-16.49 seconds)
# mozilla-try-2019-02-18-zstd-sparse-revlog
6.4.3: 23.850555 seconds
after: 37.073753 seconds (+55.44%) (+13.22 seconds)
import _lsprof
import sys
from .pycompat import getattr
Profiler = _lsprof.Profiler
# PyPy doesn't expose profiler_entry from the module.
profiler_entry = getattr(_lsprof, 'profiler_entry', None)
__all__ = [b'profile', b'Stats']
def profile(f, *args, **kwds):
"""XXX docstring"""
p = Profiler()
p.enable(subcalls=True, builtins=True)
try:
f(*args, **kwds)
finally:
p.disable()
return Stats(p.getstats())
class Stats:
"""XXX docstring"""
def __init__(self, data):
self.data = data
def sort(self, crit="inlinetime"):
"""XXX docstring"""
# profiler_entries isn't defined when running under PyPy.
if profiler_entry:
if crit not in profiler_entry.__dict__:
raise ValueError(b"Can't sort by %s" % crit)
elif self.data and not getattr(self.data[0], crit, None):
raise ValueError(b"Can't sort by %s" % crit)
self.data.sort(key=lambda x: getattr(x, crit), reverse=True)
for e in self.data:
if e.calls:
e.calls.sort(key=lambda x: getattr(x, crit), reverse=True)
def pprint(self, top=None, file=None, limit=None, climit=None):
"""XXX docstring"""
if file is None:
file = sys.stdout
d = self.data
if top is not None:
d = d[:top]
cols = b"% 12d %12d %11.4f %11.4f %s\n"
hcols = b"% 12s %12s %12s %12s %s\n"
file.write(
hcols
% (
b"CallCount",
b"Recursive",
b"Total(s)",
b"Inline(s)",
b"module:lineno(function)",
)
)
count = 0
for e in d:
file.write(
cols
% (
e.callcount,
e.reccallcount,
e.totaltime,
e.inlinetime,
label(e.code),
)
)
count += 1
if limit is not None and count == limit:
return
ccount = 0
if climit and e.calls:
for se in e.calls:
file.write(
cols
% (
se.callcount,
se.reccallcount,
se.totaltime,
se.inlinetime,
b" %s" % label(se.code),
)
)
count += 1
ccount += 1
if limit is not None and count == limit:
return
if climit is not None and ccount == climit:
break
def freeze(self):
"""Replace all references to code objects with string
descriptions; this makes it possible to pickle the instance."""
# this code is probably rather ickier than it needs to be!
for i in range(len(self.data)):
e = self.data[i]
if not isinstance(e.code, str):
self.data[i] = type(e)((label(e.code),) + e[1:])
if e.calls:
for j in range(len(e.calls)):
se = e.calls[j]
if not isinstance(se.code, str):
e.calls[j] = type(se)((label(se.code),) + se[1:])
_fn2mod = {}
def label(code):
if isinstance(code, str):
return code.encode('latin-1')
try:
mname = _fn2mod[code.co_filename]
except KeyError:
for k, v in list(sys.modules.items()):
if v is None:
continue
if not isinstance(getattr(v, '__file__', None), str):
continue
if v.__file__.startswith(code.co_filename):
mname = _fn2mod[code.co_filename] = k
break
else:
mname = _fn2mod[code.co_filename] = '<%s>' % code.co_filename
res = '%s:%d(%s)' % (mname, code.co_firstlineno, code.co_name)
return res.encode('latin-1')