view hgext/churn.py @ 46527:018d622e814d

test-copies: reinstall initial identical (empty) files for chained copied This effectively back out changeset deeb215be337. Changeset deeb215be33 does not really include a justification for its change and make mes uncomfortable. I have been thinking about it and they are two options: - either having empty/full files does not make a difference, and deeb215be337 is a gratuitous changes. - either having empty/full files do make a difference and deeb215be33 silently change the test coverage. In such situation if we want the "not empty" case to be tested, we should add new cases to cover them In practice, we know that the "file content did not change, but merge still need to create a new filenode" case exists (for example if merging result in similar content but both parent of the file need to be recorded), and that such case are easy to miss/mess-up in the tests. Having all the file using the same (empty) content was done on purpose to increase the coverage of such corner case. As a result I am reinstalling the previous test situation. To increase the coverage of some case involving content-merge in test-copies-chain-merge.t, we will add a new, dedicated, cases later in this series, once various cleanup and test improvement have been set in place. This changeset starts with reinstalling the previous situation as (1) it is more fragile, so I am more confided getting it back in the initial situation, (2) I have specific test further down the line that are base on these one. The next changeset will slightly alter the test to use non-empty files for these tests (with identical content). It should help to make the initial intent "merge file with identical content" clearer. I am still using a two steps (backout, then change content) approach to facilitate careful validation of the output change. Doing so has a large impact on the output of the "copy info in changeset extra" variant added in 5e72827dae1e (2 changesets after deeb215be33). It seems to highlight various breakage when merge without content change are involved, this is a good example of why we want to explicitly test theses cases. Because the different -do- matters a lot. Fixing the "copy info in changeset extra" is not a priority here. Because (1) this changeset does not break anything, it only highlight that they were always broken. (2) the only people using "copy info in changeset extra" do not have merge. Differential Revision: https://phab.mercurial-scm.org/D9587
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Thu, 10 Dec 2020 14:25:36 +0100
parents b84c3d43ff2e
children 6000f5b25c9b
line wrap: on
line source

# churn.py - create a graph of revisions count grouped by template
#
# Copyright 2006 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
# Copyright 2008 Alexander Solovyov <piranha@piranha.org.ua>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

'''command to display statistics about repository history'''

from __future__ import absolute_import, division

import datetime
import os
import time

from mercurial.i18n import _
from mercurial.pycompat import open
from mercurial import (
    cmdutil,
    encoding,
    logcmdutil,
    patch,
    pycompat,
    registrar,
    scmutil,
)

cmdtable = {}
command = registrar.command(cmdtable)
# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
# be specifying the version(s) of Mercurial they are tested with, or
# leave the attribute unspecified.
testedwith = b'ships-with-hg-core'


def changedlines(ui, repo, ctx1, ctx2, fmatch):
    added, removed = 0, 0
    diff = b''.join(patch.diff(repo, ctx1.node(), ctx2.node(), fmatch))
    inhunk = False
    for l in diff.split(b'\n'):
        if inhunk and l.startswith(b"+"):
            added += 1
        elif inhunk and l.startswith(b"-"):
            removed += 1
        elif l.startswith(b"@"):
            inhunk = True
        elif l.startswith(b"d"):
            inhunk = False
    return (added, removed)


def countrate(ui, repo, amap, *pats, **opts):
    """Calculate stats"""
    opts = pycompat.byteskwargs(opts)
    if opts.get(b'dateformat'):

        def getkey(ctx):
            t, tz = ctx.date()
            date = datetime.datetime(*time.gmtime(float(t) - tz)[:6])
            return encoding.strtolocal(
                date.strftime(encoding.strfromlocal(opts[b'dateformat']))
            )

    else:
        tmpl = opts.get(b'oldtemplate') or opts.get(b'template')
        tmpl = logcmdutil.maketemplater(ui, repo, tmpl)

        def getkey(ctx):
            ui.pushbuffer()
            tmpl.show(ctx)
            return ui.popbuffer()

    progress = ui.makeprogress(
        _(b'analyzing'), unit=_(b'revisions'), total=len(repo)
    )
    rate = {}

    def prep(ctx, fmatch):
        rev = ctx.rev()
        key = getkey(ctx).strip()
        key = amap.get(key, key)  # alias remap
        if opts.get(b'changesets'):
            rate[key] = (rate.get(key, (0,))[0] + 1, 0)
        else:
            parents = ctx.parents()
            if len(parents) > 1:
                ui.note(_(b'revision %d is a merge, ignoring...\n') % (rev,))
                return

            ctx1 = parents[0]
            lines = changedlines(ui, repo, ctx1, ctx, fmatch)
            rate[key] = [r + l for r, l in zip(rate.get(key, (0, 0)), lines)]

        progress.increment()

    wopts = logcmdutil.walkopts(
        pats=pats,
        opts=opts,
        revspec=opts[b'rev'],
        date=opts[b'date'],
        include_pats=opts[b'include'],
        exclude_pats=opts[b'exclude'],
    )
    revs, makefilematcher = logcmdutil.makewalker(repo, wopts)
    for ctx in scmutil.walkchangerevs(repo, revs, makefilematcher, prep):
        continue

    progress.complete()

    return rate


@command(
    b'churn',
    [
        (
            b'r',
            b'rev',
            [],
            _(b'count rate for the specified revision or revset'),
            _(b'REV'),
        ),
        (
            b'd',
            b'date',
            b'',
            _(b'count rate for revisions matching date spec'),
            _(b'DATE'),
        ),
        (
            b't',
            b'oldtemplate',
            b'',
            _(b'template to group changesets (DEPRECATED)'),
            _(b'TEMPLATE'),
        ),
        (
            b'T',
            b'template',
            b'{author|email}',
            _(b'template to group changesets'),
            _(b'TEMPLATE'),
        ),
        (
            b'f',
            b'dateformat',
            b'',
            _(b'strftime-compatible format for grouping by date'),
            _(b'FORMAT'),
        ),
        (b'c', b'changesets', False, _(b'count rate by number of changesets')),
        (b's', b'sort', False, _(b'sort by key (default: sort by count)')),
        (b'', b'diffstat', False, _(b'display added/removed lines separately')),
        (b'', b'aliases', b'', _(b'file with email aliases'), _(b'FILE')),
    ]
    + cmdutil.walkopts,
    _(b"hg churn [-d DATE] [-r REV] [--aliases FILE] [FILE]"),
    helpcategory=command.CATEGORY_MAINTENANCE,
    inferrepo=True,
)
def churn(ui, repo, *pats, **opts):
    """histogram of changes to the repository

    This command will display a histogram representing the number
    of changed lines or revisions, grouped according to the given
    template. The default template will group changes by author.
    The --dateformat option may be used to group the results by
    date instead.

    Statistics are based on the number of changed lines, or
    alternatively the number of matching revisions if the
    --changesets option is specified.

    Examples::

      # display count of changed lines for every committer
      hg churn -T "{author|email}"

      # display daily activity graph
      hg churn -f "%H" -s -c

      # display activity of developers by month
      hg churn -f "%Y-%m" -s -c

      # display count of lines changed in every year
      hg churn -f "%Y" -s

      # display count of lines changed in a time range
      hg churn -d "2020-04 to 2020-09"

    It is possible to map alternate email addresses to a main address
    by providing a file using the following format::

      <alias email> = <actual email>

    Such a file may be specified with the --aliases option, otherwise
    a .hgchurn file will be looked for in the working directory root.
    Aliases will be split from the rightmost "=".
    """

    def pad(s, l):
        return s + b" " * (l - encoding.colwidth(s))

    amap = {}
    aliases = opts.get('aliases')
    if not aliases and os.path.exists(repo.wjoin(b'.hgchurn')):
        aliases = repo.wjoin(b'.hgchurn')
    if aliases:
        for l in open(aliases, b"rb"):
            try:
                alias, actual = l.rsplit(b'=' in l and b'=' or None, 1)
                amap[alias.strip()] = actual.strip()
            except ValueError:
                l = l.strip()
                if l:
                    ui.warn(_(b"skipping malformed alias: %s\n") % l)
                continue

    rate = list(countrate(ui, repo, amap, *pats, **opts).items())
    if not rate:
        return

    if opts.get('sort'):
        rate.sort()
    else:
        rate.sort(key=lambda x: (-sum(x[1]), x))

    # Be careful not to have a zero maxcount (issue833)
    maxcount = float(max(sum(v) for k, v in rate)) or 1.0
    maxname = max(len(k) for k, v in rate)

    ttywidth = ui.termwidth()
    ui.debug(b"assuming %i character terminal\n" % ttywidth)
    width = ttywidth - maxname - 2 - 2 - 2

    if opts.get('diffstat'):
        width -= 15

        def format(name, diffstat):
            added, removed = diffstat
            return b"%s %15s %s%s\n" % (
                pad(name, maxname),
                b'+%d/-%d' % (added, removed),
                ui.label(b'+' * charnum(added), b'diffstat.inserted'),
                ui.label(b'-' * charnum(removed), b'diffstat.deleted'),
            )

    else:
        width -= 6

        def format(name, count):
            return b"%s %6d %s\n" % (
                pad(name, maxname),
                sum(count),
                b'*' * charnum(sum(count)),
            )

    def charnum(count):
        return int(count * width // maxcount)

    for name, count in rate:
        ui.write(format(name, count))