# HG changeset patch # User Brodie Rao # Date 1316473083 25200 # Node ID 16dc9a32ca04510e5796fece99393fdfc96a5932 # Parent 353a1ba928f682a0a3e29bf54b90c7cdc2f98fef mdiff: speed up showfunc for large diffs This addresses the following issues with showfunc: - Silly usage of regular expressions. - Doing str.rstrip() needlessly in an inner loop. - Doing catastrophic backtracking when trying to find a function line. Finding function text is now at worst O(n lines in the old file), and at best close to O(n hunks). Given a diff like this[1]: src/main/antlr3/uk/ac/cam/ch/wwmm/pregenerated/ChemicalChunker.g | 4 +- src/main/java/uk/ac/cam/ch/wwmm/pregenerated/ChemicalChunkerLexer.java | 2 +- src/main/java/uk/ac/cam/ch/wwmm/pregenerated/ChemicalChunkerParser.java | 29189 +++++---- 3 files changed, 14741 insertions(+), 14454 deletions(-) [1]: https://bitbucket.org/wwmm/chemicaltagger/changeset/d2bfbaecd4fc/raw Without this change, hg log --stat --config diff.showfunc=1 takes an absurdly long time to complete: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 32813 0 80.3546 40.6086 mercurial.mdiff:160(yieldhunk) +65062746 0 25.7227 25.7227 + +65062746 0 14.0221 14.0221 + +1809 0 0.0009 0.0009 +mercurial.mdiff:148(contextend) +1809 0 0.0003 0.0003 + 65062746 0 25.7227 25.7227 65062763 0 14.0221 14.0221 543 0 0.1631 0.1631 3 0 0.0505 0.0505 31007 0 80.4564 0.0477 mercurial.mdiff:147(_unidiff) +32813 0 80.3546 40.6086 +mercurial.mdiff:160(yieldhunk) +3 0 0.0505 0.0505 + +3618 0 0.0022 0.0022 +mercurial.mdiff:154(contextstart) +5427 0 0.0013 0.0013 + +3 0 0.0001 0.0000 +re:188(compile) 1 0 80.8381 0.0322 mercurial.patch:1777(diffstatdata) +107499 0 0.0235 0.0235 + +31014 0 80.7820 0.0071 +mercurial.util:1284(iterlines) +3 0 0.0000 0.0000 + +4 0 0.0000 0.0000 +mercurial.patch:1783(addresult) +3 0 0.0000 0.0000 + 6 0 0.0444 0.0283 mercurial.mdiff:12(splitnewlines) +6 0 0.0160 0.0160 + 32 0 0.0246 0.0246 11 0 0.0236 0.0236 Time: real 80.880 secs (user 80.200+0.000 sys 0.380+0.000) With this change, it's almost as fast as not using showfunc at all: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 543 0 0.1699 0.1699 3 0 0.0501 0.0501 32813 0 0.0415 0.0348 mercurial.mdiff:161(yieldhunk) +70837 0 0.0058 0.0058 + +1809 0 0.0006 0.0006 +mercurial.mdiff:148(contextend) +1809 0 0.0002 0.0002 + 1 0 0.4879 0.0310 mercurial.patch:1777(diffstatdata) +107499 0 0.0230 0.0230 + +31014 0 0.4335 0.0065 +mercurial.util:1284(iterlines) +3 0 0.0000 0.0000 + +4 0 0.0000 0.0000 +mercurial.patch:1783(addresult) +1 0 0.0004 0.0000 +re:188(compile) 32 0 0.0293 0.0293 6 0 0.0427 0.0279 mercurial.mdiff:12(splitnewlines) +6 0 0.0147 0.0147 + 31007 0 0.1169 0.0235 mercurial.mdiff:147(_unidiff) +3 0 0.0501 0.0501 + +32813 0 0.0415 0.0348 +mercurial.mdiff:161(yieldhunk) +3618 0 0.0012 0.0012 +mercurial.mdiff:154(contextstart) +5427 0 0.0006 0.0006 + 107597 0 0.0230 0.0230 16 0 0.0213 0.0213 194 0 0.0149 0.0149 Time: real 0.530 secs (user 0.450+0.000 sys 0.070+0.000) diff -r 353a1ba928f6 -r 16dc9a32ca04 mercurial/mdiff.py --- a/mercurial/mdiff.py Mon Sep 19 16:28:44 2011 -0500 +++ b/mercurial/mdiff.py Mon Sep 19 15:58:03 2011 -0700 @@ -157,6 +157,7 @@ return 0 return ret + lastfunc = [0, ''] def yieldhunk(hunk): (astart, a2, bstart, b2, delta) = hunk aend = contextend(a2, len(l1)) @@ -165,13 +166,19 @@ func = "" if opts.showfunc: - # walk backwards from the start of the context - # to find a line starting with an alphanumeric char. - for x in xrange(astart - 1, -1, -1): - t = l1[x].rstrip() - if funcre.match(t): - func = ' ' + t[:40] + lastpos, func = lastfunc + # walk backwards from the start of the context up to the start of + # the previous hunk context until we find a line starting with an + # alphanumeric char. + for i in xrange(astart - 1, lastpos - 1, -1): + if l1[i][0].isalnum(): + func = ' ' + l1[i].rstrip()[:40] + lastfunc[1] = func break + # by recording this hunk's starting point as the next place to + # start looking for function lines, we avoid reading any line in + # the file more than once. + lastfunc[0] = astart yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, bstart + 1, blen, func) @@ -180,9 +187,6 @@ for x in xrange(a2, aend): yield ' ' + l1[x] - if opts.showfunc: - funcre = re.compile('\w') - # bdiff.blocks gives us the matching sequences in the files. The loop # below finds the spaces between those matching sequences and translates # them into diff output.