view hgext/churn.py @ 20742:3681de20b0a7

parsers: fail fast if Python has wrong minor version (issue4110) This change causes an informative ImportError to be raised when importing the parsers extension module if the minor version of the currently-running Python interpreter doesn't match that of the Python used when compiling the extension module. This change also exposes a parsers.versionerrortext constant in the C implementation of the module. Its presence can be used to determine whether this behavior is present in a version of the module. The value of the constant is the leading text of the ImportError raised and is set to "Python minor version mismatch". Here is an example of what the new error looks like: Traceback (most recent call last): File "test.py", line 1, in <module> import mercurial.parsers ImportError: Python minor version mismatch: The Mercurial extension modules were compiled with Python 2.7.6, but Mercurial is currently using Python with sys.hexversion=33883888: Python 2.5.6 (r256:88840, Nov 18 2012, 05:37:10) [GCC 4.2.1 Compatible Apple Clang 4.1 ((tags/Apple/clang-421.11.66))] at: /opt/local/Library/Frameworks/Python.framework/Versions/2.5/Resources/ Python.app/Contents/MacOS/Python The reason for raising an error in this scenario is that Python's C API is known not to be compatible from minor version to minor version, even if sys.api_version is the same. See for example this Python bug report about incompatibilities between 2.5 and 2.6+: http://bugs.python.org/issue8118 These incompatibilities can cause Mercurial to break in mysterious, unforeseen ways. For example, when Mercurial compiled with Python 2.7 was run with 2.5, the following crash occurred when running "hg status": http://bz.selenic.com/show_bug.cgi?id=4110 After this crash was fixed, running with Python 2.5 no longer crashes, but the following puzzling behavior still occurs: $ hg status ... File ".../mercurial/changelog.py", line 123, in __init__ revlog.revlog.__init__(self, opener, "00changelog.i") File ".../mercurial/revlog.py", line 251, in __init__ d = self._io.parseindex(i, self._inline) File ".../mercurial/revlog.py", line 158, in parseindex index, cache = parsers.parse_index2(data, inline) TypeError: data is not a string which can be reproduced more simply with: import mercurial.parsers as parsers parsers.parse_index2("", True) Both the crash and the TypeError occurred because the Python C API's PyString_Check() returns the wrong value when the C header files from Python 2.7 are run with Python 2.5. This is an example of an incompatibility of the sort mentioned in the Python bug report above. Failing fast with an informative error message results in a better user experience in cases like the above. The information in the ImportError also simplifies troubleshooting for those on Mercurial mailing lists, the bug tracker, etc. This patch only adds the version check to parsers.c, which is sufficient to affect command-line commands like "hg status" and "hg summary". An idea for a future improvement is to move the version-checking C code to a more central location, and have it run when importing all Mercurial extension modules and not just parsers.c.
author Chris Jerdonek <chris.jerdonek@gmail.com>
date Wed, 04 Dec 2013 20:38:27 -0800
parents e96e9f805c19
children 9846b40d01e7
line wrap: on
line source

# churn.py - create a graph of revisions count grouped by template
#
# Copyright 2006 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
# Copyright 2008 Alexander Solovyov <piranha@piranha.org.ua>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

'''command to display statistics about repository history'''

from mercurial.i18n import _
from mercurial import patch, cmdutil, scmutil, util, templater, commands
import os
import time, datetime

testedwith = 'internal'

def maketemplater(ui, repo, tmpl):
    tmpl = templater.parsestring(tmpl, quoted=False)
    try:
        t = cmdutil.changeset_templater(ui, repo, False, None, tmpl,
                                        None, False)
    except SyntaxError, inst:
        raise util.Abort(inst.args[0])
    return t

def changedlines(ui, repo, ctx1, ctx2, fns):
    added, removed = 0, 0
    fmatch = scmutil.matchfiles(repo, fns)
    diff = ''.join(patch.diff(repo, ctx1.node(), ctx2.node(), fmatch))
    for l in diff.split('\n'):
        if l.startswith("+") and not l.startswith("+++ "):
            added += 1
        elif l.startswith("-") and not l.startswith("--- "):
            removed += 1
    return (added, removed)

def countrate(ui, repo, amap, *pats, **opts):
    """Calculate stats"""
    if opts.get('dateformat'):
        def getkey(ctx):
            t, tz = ctx.date()
            date = datetime.datetime(*time.gmtime(float(t) - tz)[:6])
            return date.strftime(opts['dateformat'])
    else:
        tmpl = opts.get('template', '{author|email}')
        tmpl = maketemplater(ui, repo, tmpl)
        def getkey(ctx):
            ui.pushbuffer()
            tmpl.show(ctx)
            return ui.popbuffer()

    state = {'count': 0}
    rate = {}
    df = False
    if opts.get('date'):
        df = util.matchdate(opts['date'])

    m = scmutil.match(repo[None], pats, opts)
    def prep(ctx, fns):
        rev = ctx.rev()
        if df and not df(ctx.date()[0]): # doesn't match date format
            return

        key = getkey(ctx).strip()
        key = amap.get(key, key) # alias remap
        if opts.get('changesets'):
            rate[key] = (rate.get(key, (0,))[0] + 1, 0)
        else:
            parents = ctx.parents()
            if len(parents) > 1:
                ui.note(_('revision %d is a merge, ignoring...\n') % (rev,))
                return

            ctx1 = parents[0]
            lines = changedlines(ui, repo, ctx1, ctx, fns)
            rate[key] = [r + l for r, l in zip(rate.get(key, (0, 0)), lines)]

        state['count'] += 1
        ui.progress(_('analyzing'), state['count'], total=len(repo))

    for ctx in cmdutil.walkchangerevs(repo, m, opts, prep):
        continue

    ui.progress(_('analyzing'), None)

    return rate


def churn(ui, repo, *pats, **opts):
    '''histogram of changes to the repository

    This command will display a histogram representing the number
    of changed lines or revisions, grouped according to the given
    template. The default template will group changes by author.
    The --dateformat option may be used to group the results by
    date instead.

    Statistics are based on the number of changed lines, or
    alternatively the number of matching revisions if the
    --changesets option is specified.

    Examples::

      # display count of changed lines for every committer
      hg churn -t "{author|email}"

      # display daily activity graph
      hg churn -f "%H" -s -c

      # display activity of developers by month
      hg churn -f "%Y-%m" -s -c

      # display count of lines changed in every year
      hg churn -f "%Y" -s

    It is possible to map alternate email addresses to a main address
    by providing a file using the following format::

      <alias email> = <actual email>

    Such a file may be specified with the --aliases option, otherwise
    a .hgchurn file will be looked for in the working directory root.
    Aliases will be split from the rightmost "=".
    '''
    def pad(s, l):
        return (s + " " * l)[:l]

    amap = {}
    aliases = opts.get('aliases')
    if not aliases and os.path.exists(repo.wjoin('.hgchurn')):
        aliases = repo.wjoin('.hgchurn')
    if aliases:
        for l in open(aliases, "r"):
            try:
                alias, actual = l.rsplit('=' in l and '=' or None, 1)
                amap[alias.strip()] = actual.strip()
            except ValueError:
                l = l.strip()
                if l:
                    ui.warn(_("skipping malformed alias: %s\n") % l)
                continue

    rate = countrate(ui, repo, amap, *pats, **opts).items()
    if not rate:
        return

    if opts.get('sort'):
        rate.sort()
    else:
        rate.sort(key=lambda x: (-sum(x[1]), x))

    # Be careful not to have a zero maxcount (issue833)
    maxcount = float(max(sum(v) for k, v in rate)) or 1.0
    maxname = max(len(k) for k, v in rate)

    ttywidth = ui.termwidth()
    ui.debug("assuming %i character terminal\n" % ttywidth)
    width = ttywidth - maxname - 2 - 2 - 2

    if opts.get('diffstat'):
        width -= 15
        def format(name, diffstat):
            added, removed = diffstat
            return "%s %15s %s%s\n" % (pad(name, maxname),
                                       '+%d/-%d' % (added, removed),
                                       ui.label('+' * charnum(added),
                                                'diffstat.inserted'),
                                       ui.label('-' * charnum(removed),
                                                'diffstat.deleted'))
    else:
        width -= 6
        def format(name, count):
            return "%s %6d %s\n" % (pad(name, maxname), sum(count),
                                    '*' * charnum(sum(count)))

    def charnum(count):
        return int(round(count * width / maxcount))

    for name, count in rate:
        ui.write(format(name, count))


cmdtable = {
    "churn":
        (churn,
         [('r', 'rev', [],
           _('count rate for the specified revision or range'), _('REV')),
          ('d', 'date', '',
           _('count rate for revisions matching date spec'), _('DATE')),
          ('t', 'template', '{author|email}',
           _('template to group changesets'), _('TEMPLATE')),
          ('f', 'dateformat', '',
           _('strftime-compatible format for grouping by date'), _('FORMAT')),
          ('c', 'changesets', False, _('count rate by number of changesets')),
          ('s', 'sort', False, _('sort by key (default: sort by count)')),
          ('', 'diffstat', False, _('display added/removed lines separately')),
          ('', 'aliases', '',
           _('file with email aliases'), _('FILE')),
          ] + commands.walkopts,
         _("hg churn [-d DATE] [-r REV] [--aliases FILE] [FILE]")),
}

commands.inferrepo += " churn"