Mercurial > hg
view contrib/hgdiff @ 10262:eb243551cbd8 stable
copies: speed up copy detection
On some large repos, copy detection could spend > 10min using
fctx.ancestor() to determine if file revisions were actually related.
Because ancestor must traverse history to the root to determine the
GCA, it was doing a lot more work than necessary. With this
replacement, same status -r a:b takes ~3 seconds.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Tue, 19 Jan 2010 22:20:05 -0600 |
parents | 31177742f54a |
children |
line wrap: on
line source
#!/usr/bin/env python import os, sys, struct, stat import difflib import re from optparse import OptionParser from mercurial.bdiff import bdiff, blocks from mercurial.mdiff import bunidiff, diffopts VERSION="0.3" usage = "usage: %prog [options] file1 file2" parser = OptionParser(usage=usage) parser.add_option("-d", "--difflib", action="store_true", default=False) parser.add_option('-x', '--count', default=1) parser.add_option('-c', '--context', type="int", default=3) parser.add_option('-p', '--show-c-function', action="store_true", default=False) parser.add_option('-w', '--ignore-all-space', action="store_true", default=False) (options, args) = parser.parse_args() if not args: parser.print_help() sys.exit(1) # simple utility function to put all the # files from a directory tree into a dict def buildlist(names, top): tlen = len(top) for root, dirs, files in os.walk(top): l = root[tlen + 1:] for x in files: p = os.path.join(root, x) st = os.lstat(p) if stat.S_ISREG(st.st_mode): names[os.path.join(l, x)] = (st.st_dev, st.st_ino) def diff_files(file1, file2): if file1 is None: b = file(file2).read().splitlines(True) l1 = "--- %s\n" % (file2) l2 = "+++ %s\n" % (file2) l3 = "@@ -0,0 +1,%d @@\n" % len(b) l = [l1, l2, l3] + ["+" + e for e in b] elif file2 is None: a = file(file1).read().splitlines(True) l1 = "--- %s\n" % (file1) l2 = "+++ %s\n" % (file1) l3 = "@@ -1,%d +0,0 @@\n" % len(a) l = [l1, l2, l3] + ["-" + e for e in a] else: t1 = file(file1).read() t2 = file(file2).read() l1 = t1.splitlines(True) l2 = t2.splitlines(True) if options.difflib: l = difflib.unified_diff(l1, l2, file1, file2) else: l = bunidiff(t1, t2, l1, l2, file1, file2, diffopts(context=options.context, showfunc=options.show_c_function, ignorews=options.ignore_all_space)) for x in l: if x[-1] != '\n': x += "\n\ No newline at end of file\n" print x, file1 = args[0] file2 = args[1] if os.path.isfile(file1) and os.path.isfile(file2): diff_files(file1, file2) elif os.path.isdir(file1): if not os.path.isdir(file2): sys.stderr.write("file types don't match\n") sys.exit(1) d1 = {} d2 = {} buildlist(d1, file1) buildlist(d2, file2) keys = d1.keys() keys.sort() for x in keys: if x not in d2: f2 = None else: f2 = os.path.join(file2, x) st1 = d1[x] st2 = d2[x] del d2[x] if st1[0] == st2[0] and st1[1] == st2[1]: sys.stderr.write("%s is a hard link\n" % x) continue x = os.path.join(file1, x) diff_files(x, f2) keys = d2.keys() keys.sort() for x in keys: f1 = None x = os.path.join(file2, x) diff_files(f1, x)