tests/autodiff.py
author Matt Mackall <mpm@selenic.com>
Thu, 21 Apr 2016 22:04:11 -0500
branchstable
changeset 29014 f1ca249696ed
parent 27281 3b517f2a3989
child 32376 46ba2cdda476
permissions -rw-r--r--
bdiff: balance recursion to avoid quadratic behavior (issue4704) For highly structured files like JSON or XML dumps with large numbers of duplicate lines (eg braces) and isolated matching lines, bdiff could find large numbers of equally good spans. Because it prefers earlier matches, this would result in pathologically unbalance recursion that resulted in quadratic performance. This patch makes it prefer matches closer to the middle that tend to balance recursion. This change improves the speed of a pathological test case from 1100s to 9s. Included is a smaller test that has a roughly 50x safety margin on the performance it accepts. It's likely to fail on pure builds because difflib also has a recursion-balancing problem.

# Extension dedicated to test patch.diff() upgrade modes

from __future__ import absolute_import

from mercurial import (
    cmdutil,
    error,
    patch,
    scmutil,
)

cmdtable = {}
command = cmdutil.command(cmdtable)

@command('autodiff',
    [('', 'git', '', 'git upgrade mode (yes/no/auto/warn/abort)')],
    '[OPTION]... [FILE]...')
def autodiff(ui, repo, *pats, **opts):
    diffopts = patch.difffeatureopts(ui, opts)
    git = opts.get('git', 'no')
    brokenfiles = set()
    losedatafn = None
    if git in ('yes', 'no'):
        diffopts.git = git == 'yes'
        diffopts.upgrade = False
    elif git == 'auto':
        diffopts.git = False
        diffopts.upgrade = True
    elif git == 'warn':
        diffopts.git = False
        diffopts.upgrade = True
        def losedatafn(fn=None, **kwargs):
            brokenfiles.add(fn)
            return True
    elif git == 'abort':
        diffopts.git = False
        diffopts.upgrade = True
        def losedatafn(fn=None, **kwargs):
            raise error.Abort('losing data for %s' % fn)
    else:
        raise error.Abort('--git must be yes, no or auto')

    node1, node2 = scmutil.revpair(repo, [])
    m = scmutil.match(repo[node2], pats, opts)
    it = patch.diff(repo, node1, node2, match=m, opts=diffopts,
                    losedatafn=losedatafn)
    for chunk in it:
        ui.write(chunk)
    for fn in sorted(brokenfiles):
        ui.write(('data lost for: %s\n' % fn))