view hgext/blackbox.py @ 42377:0546ead39a7e stable

manifest: avoid corruption by dropping removed files with pure (issue5801) Previously, removed files would simply be marked by overwriting the first byte with NUL and dropping their entry in `self.position`. But no effort was made to ignore them when compacting the dictionary into text form. This allowed them to slip into the manifest revision, since the code seems to be trying to minimize the string operations by copying as large a chunk as possible. As part of this, compact() walks the existing text based on entries in the `positions` list, and consumed everything up to the next position entry. This typically resulted in a ValueError complaining about unsorted manifest entries. Sometimes it seems that files do get dropped in large repos- it seems to correspond to there being a new entry that would take the same slot. A much more trivial problem is that if the only changes were removals, `_compact()` didn't even run because `__delitem__` doesn't add anything to `self.extradata`. Now there's an explicit variable to flag this, both to allow `_compact()` to run, and to avoid searching the manifest in cases where there are no removals. In practice, this behavior was mostly obscured by the check in fastdelta() which takes a different path that explicitly drops removed files if there are fewer than 1000 changes. However, timeless has a repo where after rebasing tens of commits, a totally different path[1] is taken that bypasses the change count check and hits this problem. [1] https://www.mercurial-scm.org/repo/hg/file/2338bdea4474/mercurial/manifest.py#l1511
author Matt Harbison <matt_harbison@yahoo.com>
date Thu, 23 May 2019 21:54:24 -0400
parents bd3f03d8cc9f
children 56132ebd14c6
line wrap: on
line source

# blackbox.py - log repository events to a file for post-mortem debugging
#
# Copyright 2010 Nicolas Dumazet
# Copyright 2013 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

"""log repository events to a blackbox for debugging

Logs event information to .hg/blackbox.log to help debug and diagnose problems.
The events that get logged can be configured via the blackbox.track config key.

Examples::

  [blackbox]
  track = *
  # dirty is *EXPENSIVE* (slow);
  # each log entry indicates `+` if the repository is dirty, like :hg:`id`.
  dirty = True
  # record the source of log messages
  logsource = True

  [blackbox]
  track = command, commandfinish, commandexception, exthook, pythonhook

  [blackbox]
  track = incoming

  [blackbox]
  # limit the size of a log file
  maxsize = 1.5 MB
  # rotate up to N log files when the current one gets too big
  maxfiles = 3

  [blackbox]
  # Include nanoseconds in log entries with %f (see Python function
  # datetime.datetime.strftime)
  date-format = '%Y-%m-%d @ %H:%M:%S.%f'

"""

from __future__ import absolute_import

import re

from mercurial.i18n import _
from mercurial.node import hex

from mercurial import (
    encoding,
    loggingutil,
    registrar,
)
from mercurial.utils import (
    dateutil,
    procutil,
)

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
# be specifying the version(s) of Mercurial they are tested with, or
# leave the attribute unspecified.
testedwith = 'ships-with-hg-core'

cmdtable = {}
command = registrar.command(cmdtable)

configtable = {}
configitem = registrar.configitem(configtable)

configitem('blackbox', 'dirty',
    default=False,
)
configitem('blackbox', 'maxsize',
    default='1 MB',
)
configitem('blackbox', 'logsource',
    default=False,
)
configitem('blackbox', 'maxfiles',
    default=7,
)
configitem('blackbox', 'track',
    default=lambda: ['*'],
)
configitem('blackbox', 'date-format',
    default='%Y/%m/%d %H:%M:%S',
)

_lastlogger = loggingutil.proxylogger()

class blackboxlogger(object):
    def __init__(self, ui, repo):
        self._repo = repo
        self._trackedevents = set(ui.configlist('blackbox', 'track'))
        self._maxfiles = ui.configint('blackbox', 'maxfiles')
        self._maxsize = ui.configbytes('blackbox', 'maxsize')
        self._inlog = False

    def tracked(self, event):
        return b'*' in self._trackedevents or event in self._trackedevents

    def log(self, ui, event, msg, opts):
        # self._log() -> ctx.dirty() may create new subrepo instance, which
        # ui is derived from baseui. So the recursion guard in ui.log()
        # doesn't work as it's local to the ui instance.
        if self._inlog:
            return
        self._inlog = True
        try:
            self._log(ui, event, msg, opts)
        finally:
            self._inlog = False

    def _log(self, ui, event, msg, opts):
        default = ui.configdate('devel', 'default-date')
        date = dateutil.datestr(default, ui.config('blackbox', 'date-format'))
        user = procutil.getuser()
        pid = '%d' % procutil.getpid()
        changed = ''
        ctx = self._repo[None]
        parents = ctx.parents()
        rev = ('+'.join([hex(p.node()) for p in parents]))
        if (ui.configbool('blackbox', 'dirty') and
            ctx.dirty(missing=True, merge=False, branch=False)):
            changed = '+'
        if ui.configbool('blackbox', 'logsource'):
            src = ' [%s]' % event
        else:
            src = ''
        try:
            fmt = '%s %s @%s%s (%s)%s> %s'
            args = (date, user, rev, changed, pid, src, msg)
            with loggingutil.openlogfile(
                    ui, self._repo.vfs, name='blackbox.log',
                    maxfiles=self._maxfiles, maxsize=self._maxsize) as fp:
                fp.write(fmt % args)
        except (IOError, OSError) as err:
            # deactivate this to avoid failed logging again
            self._trackedevents.clear()
            ui.debug('warning: cannot write to blackbox.log: %s\n' %
                     encoding.strtolocal(err.strerror))
            return
        _lastlogger.logger = self

def uipopulate(ui):
    ui.setlogger(b'blackbox', _lastlogger)

def reposetup(ui, repo):
    # During 'hg pull' a httppeer repo is created to represent the remote repo.
    # It doesn't have a .hg directory to put a blackbox in, so we don't do
    # the blackbox setup for it.
    if not repo.local():
        return

    # Since blackbox.log is stored in the repo directory, the logger should be
    # instantiated per repository.
    logger = blackboxlogger(ui, repo)
    ui.setlogger(b'blackbox', logger)

    # Set _lastlogger even if ui.log is not called. This gives blackbox a
    # fallback place to log
    if _lastlogger.logger is None:
        _lastlogger.logger = logger

    repo._wlockfreeprefix.add('blackbox.log')

@command('blackbox',
    [('l', 'limit', 10, _('the number of events to show')),
    ],
    _('hg blackbox [OPTION]...'),
    helpcategory=command.CATEGORY_MAINTENANCE,
    helpbasic=True)
def blackbox(ui, repo, *revs, **opts):
    '''view the recent repository events
    '''

    if not repo.vfs.exists('blackbox.log'):
        return

    limit = opts.get(r'limit')
    fp = repo.vfs('blackbox.log', 'r')
    lines = fp.read().split('\n')

    count = 0
    output = []
    for line in reversed(lines):
        if count >= limit:
            break

        # count the commands by matching lines like: 2013/01/23 19:13:36 root>
        if re.match(br'^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} .*> .*', line):
            count += 1
        output.append(line)

    ui.status('\n'.join(reversed(output)))