Mercurial > hg
view hgext/blackbox.py @ 44363:f7459da77f23
nodemap: introduce an option to use mmap to read the nodemap mapping
The performance and memory benefit is much greater if we don't have to copy all
the data in memory for each information. So we introduce an option (on by
default) to read the data using mmap.
This changeset is the last one definition the API for index support nodemap
data. (they have to be able to use the mmaping).
Below are some benchmark comparing the best we currently have in 5.3 with the
final step of this series (using the persistent nodemap implementation in
Rust). The benchmark run `hg perfindex` with various revset and the following
variants:
Before:
* do not use the persistent nodemap
* use the CPython implementation of the index for nodemap
* use mmapping of the changelog index
After:
* use the MixedIndex Rust code, with the NodeTree object for nodemap access
(still in review)
* use the persistent nodemap data from disk
* access the persistent nodemap data through mmap
* use mmapping of the changelog index
The persistent nodemap greatly speed up most operation on very large
repositories. Some of the previously very fast lookup end up a bit slower because
the persistent nodemap has to be setup. However the absolute slowdown is very
small and won't matters in the big picture.
Here are some numbers (in seconds) for the reference copy of mozilla-try:
Revset Before After abs-change speedup
-10000: 0.004622 0.005532 0.000910 × 0.83
-10: 0.000050 0.000132 0.000082 × 0.37
tip 0.000052 0.000085 0.000033 × 0.61
0 + (-10000:) 0.028222 0.005337 -0.022885 × 5.29
0 0.023521 0.000084 -0.023437 × 280.01
(-10000:) + 0 0.235539 0.005308 -0.230231 × 44.37
(-10:) + :9 0.232883 0.000180 -0.232703 ×1293.79
(-10000:) + (:99) 0.238735 0.005358 -0.233377 × 44.55
:99 + (-10000:) 0.317942 0.005593 -0.312349 × 56.84
:9 + (-10:) 0.313372 0.000179 -0.313193 ×1750.68
:9 0.316450 0.000143 -0.316307 ×2212.93
On smaller repositories, the cost of nodemap related operation is not as big, so
the win is much more modest. Yet it helps shaving a handful of millisecond here
and there.
Here are some numbers (in seconds) for the reference copy of mercurial:
Revset Before After abs-change speedup
-10: 0.000065 0.000097 0.000032 × 0.67
tip 0.000063 0.000078 0.000015 × 0.80
0 0.000561 0.000079 -0.000482 × 7.10
-10000: 0.004609 0.003648 -0.000961 × 1.26
0 + (-10000:) 0.005023 0.003715 -0.001307 × 1.35
(-10:) + :9 0.002187 0.000108 -0.002079 ×20.25
(-10000:) + 0 0.006252 0.003716 -0.002536 × 1.68
(-10000:) + (:99) 0.006367 0.003707 -0.002660 × 1.71
:9 + (-10:) 0.003846 0.000110 -0.003736 ×34.96
:9 0.003854 0.000099 -0.003755 ×38.92
:99 + (-10000:) 0.007644 0.003778 -0.003866 × 2.02
Differential Revision: https://phab.mercurial-scm.org/D7894
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 11 Feb 2020 11:18:52 +0100 |
parents | 9f70512ae2cf |
children | 89a2afe31e82 |
line wrap: on
line source
# blackbox.py - log repository events to a file for post-mortem debugging # # Copyright 2010 Nicolas Dumazet # Copyright 2013 Facebook, Inc. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. """log repository events to a blackbox for debugging Logs event information to .hg/blackbox.log to help debug and diagnose problems. The events that get logged can be configured via the blackbox.track and blackbox.ignore config keys. Examples:: [blackbox] track = * ignore = pythonhook # dirty is *EXPENSIVE* (slow); # each log entry indicates `+` if the repository is dirty, like :hg:`id`. dirty = True # record the source of log messages logsource = True [blackbox] track = command, commandfinish, commandexception, exthook, pythonhook [blackbox] track = incoming [blackbox] # limit the size of a log file maxsize = 1.5 MB # rotate up to N log files when the current one gets too big maxfiles = 3 [blackbox] # Include nanoseconds in log entries with %f (see Python function # datetime.datetime.strftime) date-format = '%Y-%m-%d @ %H:%M:%S.%f' """ from __future__ import absolute_import import re from mercurial.i18n import _ from mercurial.node import hex from mercurial import ( encoding, loggingutil, registrar, ) from mercurial.utils import ( dateutil, procutil, ) # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should # be specifying the version(s) of Mercurial they are tested with, or # leave the attribute unspecified. testedwith = b'ships-with-hg-core' cmdtable = {} command = registrar.command(cmdtable) configtable = {} configitem = registrar.configitem(configtable) configitem( b'blackbox', b'dirty', default=False, ) configitem( b'blackbox', b'maxsize', default=b'1 MB', ) configitem( b'blackbox', b'logsource', default=False, ) configitem( b'blackbox', b'maxfiles', default=7, ) configitem( b'blackbox', b'track', default=lambda: [b'*'], ) configitem( b'blackbox', b'ignore', default=lambda: [b'chgserver', b'cmdserver', b'extension'], ) configitem( b'blackbox', b'date-format', default=b'%Y/%m/%d %H:%M:%S', ) _lastlogger = loggingutil.proxylogger() class blackboxlogger(object): def __init__(self, ui, repo): self._repo = repo self._trackedevents = set(ui.configlist(b'blackbox', b'track')) self._ignoredevents = set(ui.configlist(b'blackbox', b'ignore')) self._maxfiles = ui.configint(b'blackbox', b'maxfiles') self._maxsize = ui.configbytes(b'blackbox', b'maxsize') self._inlog = False def tracked(self, event): return ( b'*' in self._trackedevents and event not in self._ignoredevents ) or event in self._trackedevents def log(self, ui, event, msg, opts): # self._log() -> ctx.dirty() may create new subrepo instance, which # ui is derived from baseui. So the recursion guard in ui.log() # doesn't work as it's local to the ui instance. if self._inlog: return self._inlog = True try: self._log(ui, event, msg, opts) finally: self._inlog = False def _log(self, ui, event, msg, opts): default = ui.configdate(b'devel', b'default-date') date = dateutil.datestr(default, ui.config(b'blackbox', b'date-format')) user = procutil.getuser() pid = b'%d' % procutil.getpid() changed = b'' ctx = self._repo[None] parents = ctx.parents() rev = b'+'.join([hex(p.node()) for p in parents]) if ui.configbool(b'blackbox', b'dirty') and ctx.dirty( missing=True, merge=False, branch=False ): changed = b'+' if ui.configbool(b'blackbox', b'logsource'): src = b' [%s]' % event else: src = b'' try: fmt = b'%s %s @%s%s (%s)%s> %s' args = (date, user, rev, changed, pid, src, msg) with loggingutil.openlogfile( ui, self._repo.vfs, name=b'blackbox.log', maxfiles=self._maxfiles, maxsize=self._maxsize, ) as fp: fp.write(fmt % args) except (IOError, OSError) as err: # deactivate this to avoid failed logging again self._trackedevents.clear() ui.debug( b'warning: cannot write to blackbox.log: %s\n' % encoding.strtolocal(err.strerror) ) return _lastlogger.logger = self def uipopulate(ui): ui.setlogger(b'blackbox', _lastlogger) def reposetup(ui, repo): # During 'hg pull' a httppeer repo is created to represent the remote repo. # It doesn't have a .hg directory to put a blackbox in, so we don't do # the blackbox setup for it. if not repo.local(): return # Since blackbox.log is stored in the repo directory, the logger should be # instantiated per repository. logger = blackboxlogger(ui, repo) ui.setlogger(b'blackbox', logger) # Set _lastlogger even if ui.log is not called. This gives blackbox a # fallback place to log if _lastlogger.logger is None: _lastlogger.logger = logger repo._wlockfreeprefix.add(b'blackbox.log') @command( b'blackbox', [(b'l', b'limit', 10, _(b'the number of events to show')),], _(b'hg blackbox [OPTION]...'), helpcategory=command.CATEGORY_MAINTENANCE, helpbasic=True, ) def blackbox(ui, repo, *revs, **opts): '''view the recent repository events ''' if not repo.vfs.exists(b'blackbox.log'): return limit = opts.get('limit') fp = repo.vfs(b'blackbox.log', b'r') lines = fp.read().split(b'\n') count = 0 output = [] for line in reversed(lines): if count >= limit: break # count the commands by matching lines like: 2013/01/23 19:13:36 root> if re.match(br'^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} .*> .*', line): count += 1 output.append(line) ui.status(b'\n'.join(reversed(output)))