Mercurial > hg
changeset 1636:7da32bb3d1d3
contrib: add Chris Mason's stand-alone diff tool
This uses Mercurial's diff algorithm to generate unidiffs like the traditional diff tool.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Tue, 24 Jan 2006 14:49:19 +1300 |
parents | ae61937c61c5 |
children | 3b1b44b917f4 |
files | contrib/hgdiff |
diffstat | 1 files changed, 224 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/hgdiff Tue Jan 24 14:49:19 2006 +1300 @@ -0,0 +1,224 @@ +#!/usr/bin/env python + +import os, sys, struct, stat +import difflib +import re +from optparse import OptionParser +from mercurial.bdiff import bdiff, blocks + +VERSION="0.2" +usage = "usage: %prog [options] file1 file2" +parser = OptionParser(usage=usage) + +parser.add_option("-d", "--difflib", action="store_true", default=False) +parser.add_option('-x', '--count', default=1) +parser.add_option('-c', '--context', type="int", default=3) +parser.add_option('-p', '--show-c-function', action="store_true", default=False) +parser.add_option('-w', '--ignore-all-space', action="store_true", + default=False) + +(options, args) = parser.parse_args() + +if not args: + parser.print_help() + sys.exit(1) + +# somewhat self contained replacement for difflib.unified_diff +# t1 and t2 are the text to be diffed +# l1 and l2 are the text broken up into lines +# header1 and header2 are the filenames for the diff output +# context is the number of context lines +# showfunc enables diff -p output +# ignorews ignores all whitespace changes in the diff +def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False, + ignorews=False): + def contextend(l, len): + ret = l + context + if ret > len: + ret = len + return ret + + def contextstart(l): + ret = l - context + if ret < 0: + return 0 + return ret + + def yieldhunk(hunk, header): + if header: + for x in header: + yield x + (astart, a2, bstart, b2, delta) = hunk + aend = contextend(a2, len(l1)) + alen = aend - astart + blen = b2 - bstart + aend - a2 + + func = "" + if showfunc: + # walk backwards from the start of the context + # to find a line starting with an alphanumeric char. + for x in xrange(astart, -1, -1): + t = l1[x] + if funcre.match(t): + func = ' ' + t[:40] + break + + yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, + bstart + 1, blen, func) + for x in delta: + yield x + for x in xrange(a2, aend): + yield ' ' + l1[x] + + header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ] + + if showfunc: + funcre = re.compile('\w') + if ignorews: + wsre = re.compile('[ \t]') + + # bdiff.blocks gives us the matching sequences in the files. The loop + # below finds the spaces between those matching sequences and translates + # them into diff output. + # + diff = blocks(t1, t2) + hunk = None + for i in xrange(len(diff)): + # The first match is special. + # we've either found a match starting at line 0 or a match later + # in the file. If it starts later, old and new below will both be + # empty and we'll continue to the next match. + if i > 0: + s = diff[i-1] + else: + s = [0, 0, 0, 0] + delta = [] + s1 = diff[i] + a1 = s[1] + a2 = s1[0] + b1 = s[3] + b2 = s1[2] + old = l1[a1:a2] + new = l2[b1:b2] + + # bdiff sometimes gives huge matches past eof, this check eats them, + # and deals with the special first match case described above + if not old and not new: + continue + + if ignorews: + wsold = wsre.sub('', "".join(old)) + wsnew = wsre.sub('', "".join(new)) + if wsold == wsnew: + continue + + astart = contextstart(a1) + bstart = contextstart(b1) + prev = None + if hunk: + # join with the previous hunk if it falls inside the context + if astart < hunk[1] + context + 1: + prev = hunk + astart = hunk[1] + bstart = hunk[3] + else: + for x in yieldhunk(hunk, header): + yield x + # we only want to yield the header if the files differ, and + # we only want to yield it once. + header = None + if prev: + # we've joined the previous hunk, record the new ending points. + hunk[1] = a2 + hunk[3] = b2 + delta = hunk[4] + else: + # create a new hunk + hunk = [ astart, a2, bstart, b2, delta ] + + delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ] + delta[len(delta):] = [ '-' + x for x in old ] + delta[len(delta):] = [ '+' + x for x in new ] + + if hunk: + for x in yieldhunk(hunk, header): + yield x + +# simple utility function to put all the +# files from a directory tree into a dict +def buildlist(names, top): + tlen = len(top) + for root, dirs, files in os.walk(top): + l = root[tlen + 1:] + for x in files: + p = os.path.join(root, x) + st = os.lstat(p) + if stat.S_ISREG(st.st_mode): + names[os.path.join(l, x)] = (st.st_dev, st.st_ino) + +def diff_files(file1, file2): + if file1 == None: + b = file(file2).read().splitlines(1) + l1 = "--- %s\n" % (file2) + l2 = "+++ %s\n" % (file2) + l3 = "@@ -0,0 +1,%d @@\n" % len(b) + l = [l1, l2, l3] + ["+" + e for e in b] + elif file2 == None: + a = file(file1).read().splitlines(1) + l1 = "--- %s\n" % (file1) + l2 = "+++ %s\n" % (file1) + l3 = "@@ -1,%d +0,0 @@\n" % len(a) + l = [l1, l2, l3] + ["-" + e for e in a] + else: + t1 = file(file1).read() + t2 = file(file2).read() + l1 = t1.splitlines(1) + l2 = t2.splitlines(1) + if options.difflib: + l = difflib.unified_diff(l1, l2, file1, file2) + else: + l = bunidiff(t1, t2, l1, l2, file1, file2, context=options.context, + showfunc=options.show_c_function, + ignorews=options.ignore_all_space) + for x in l: + if x[-1] != '\n': + x += "\n\ No newline at end of file\n" + print x, + +file1 = args[0] +file2 = args[1] + +if os.path.isfile(file1) and os.path.isfile(file2): + diff_files(file1, file2) +elif os.path.isdir(file1): + if not os.path.isdir(file2): + sys.stderr.write("file types don't match\n") + sys.exit(1) + + d1 = {} + d2 = {} + + buildlist(d1, file1) + buildlist(d2, file2) + keys = d1.keys() + keys.sort() + for x in keys: + if x not in d2: + f2 = None + else: + f2 = os.path.join(file2, x) + st1 = d1[x] + st2 = d2[x] + del d2[x] + if st1[0] == st2[0] and st1[1] == st2[1]: + sys.stderr.write("%s is a hard link\n" % x) + continue + x = os.path.join(file1, x) + diff_files(x, f2) + keys = d2.keys() + keys.sort() + for x in keys: + f1 = None + x = os.path.join(file2, x) + diff_files(f1, x) +