Mercurial > hg
view mercurial/lsprof.py @ 29322:66dbdd3cc2b9 stable
bdiff: extend matches across popular lines
For very large diffs that have large numbers of identical lines (JSON
dumps) that also have large blocks of identical text, bdiff could become
confused about which block matches which because it can only match
very limited regions. The result is very large diffs for small sets of edits.
The earlier recursion rebalancing fix made this behavior more frequent because
it's now more prone to match block 1 to block 2. One frequent user of
large JSON files reported being unable to pass the resulting diffs
through their code review system.
Prior to this change, bdiff would calculate the length of a match at
(i, j) as 1 + length found at (i-1, j-1). With large number of popular
(ignored) lines, this often meant matches couldn't be extended
backwards at all and thus all matching regions were very small.
Disabling the popularity threshold is not an option because it brings
back quadratic behavior.
Instead, we extend a match backwards until we either found a previously
discovered match or we find a mismatching line. This thus successfully
bridges over any popular lines inside and before a matching region.
The larger regions then significant reduce the probability of confusion.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Thu, 02 Jun 2016 17:09:06 -0500 |
parents | b1a59b80e1a3 |
children | d4e5b2653693 |
line wrap: on
line source
from __future__ import absolute_import, print_function import _lsprof import sys Profiler = _lsprof.Profiler # PyPy doesn't expose profiler_entry from the module. profiler_entry = getattr(_lsprof, 'profiler_entry', None) __all__ = ['profile', 'Stats'] def profile(f, *args, **kwds): """XXX docstring""" p = Profiler() p.enable(subcalls=True, builtins=True) try: f(*args, **kwds) finally: p.disable() return Stats(p.getstats()) class Stats(object): """XXX docstring""" def __init__(self, data): self.data = data def sort(self, crit="inlinetime"): """XXX docstring""" # profiler_entries isn't defined when running under PyPy. if profiler_entry: if crit not in profiler_entry.__dict__: raise ValueError("Can't sort by %s" % crit) elif self.data and not getattr(self.data[0], crit, None): raise ValueError("Can't sort by %s" % crit) self.data.sort(key=lambda x: getattr(x, crit), reverse=True) for e in self.data: if e.calls: e.calls.sort(key=lambda x: getattr(x, crit), reverse=True) def pprint(self, top=None, file=None, limit=None, climit=None): """XXX docstring""" if file is None: file = sys.stdout d = self.data if top is not None: d = d[:top] cols = "% 12s %12s %11.4f %11.4f %s\n" hcols = "% 12s %12s %12s %12s %s\n" file.write(hcols % ("CallCount", "Recursive", "Total(s)", "Inline(s)", "module:lineno(function)")) count = 0 for e in d: file.write(cols % (e.callcount, e.reccallcount, e.totaltime, e.inlinetime, label(e.code))) count += 1 if limit is not None and count == limit: return ccount = 0 if climit and e.calls: for se in e.calls: file.write(cols % (se.callcount, se.reccallcount, se.totaltime, se.inlinetime, " %s" % label(se.code))) count += 1 ccount += 1 if limit is not None and count == limit: return if climit is not None and ccount == climit: break def freeze(self): """Replace all references to code objects with string descriptions; this makes it possible to pickle the instance.""" # this code is probably rather ickier than it needs to be! for i in range(len(self.data)): e = self.data[i] if not isinstance(e.code, str): self.data[i] = type(e)((label(e.code),) + e[1:]) if e.calls: for j in range(len(e.calls)): se = e.calls[j] if not isinstance(se.code, str): e.calls[j] = type(se)((label(se.code),) + se[1:]) _fn2mod = {} def label(code): if isinstance(code, str): return code try: mname = _fn2mod[code.co_filename] except KeyError: for k, v in list(sys.modules.iteritems()): if v is None: continue if not isinstance(getattr(v, '__file__', None), str): continue if v.__file__.startswith(code.co_filename): mname = _fn2mod[code.co_filename] = k break else: mname = _fn2mod[code.co_filename] = '<%s>' % code.co_filename return '%s:%d(%s)' % (mname, code.co_firstlineno, code.co_name) if __name__ == '__main__': import os sys.argv = sys.argv[1:] if not sys.argv: print("usage: lsprof.py <script> <arguments...>", file=sys.stderr) sys.exit(2) sys.path.insert(0, os.path.abspath(os.path.dirname(sys.argv[0]))) stats = profile(execfile, sys.argv[0], globals(), locals()) stats.sort() stats.pprint()