contrib: add Chris Mason's stand-alone diff tool
authorMatt Mackall <mpm@selenic.com>
Tue, 24 Jan 2006 14:49:19 +1300
changeset 1636 7da32bb3d1d3
parent 1635 ae61937c61c5
child 1637 3b1b44b917f4
contrib: add Chris Mason's stand-alone diff tool This uses Mercurial's diff algorithm to generate unidiffs like the traditional diff tool.
contrib/hgdiff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/hgdiff	Tue Jan 24 14:49:19 2006 +1300
@@ -0,0 +1,224 @@
+#!/usr/bin/env python
+
+import os, sys, struct, stat
+import difflib
+import re
+from optparse import OptionParser
+from mercurial.bdiff import bdiff, blocks
+
+VERSION="0.2"
+usage = "usage: %prog [options] file1 file2"
+parser = OptionParser(usage=usage)
+
+parser.add_option("-d", "--difflib", action="store_true", default=False)
+parser.add_option('-x', '--count', default=1)
+parser.add_option('-c', '--context', type="int", default=3)
+parser.add_option('-p', '--show-c-function', action="store_true", default=False)
+parser.add_option('-w', '--ignore-all-space', action="store_true", 
+                  default=False)
+
+(options, args) = parser.parse_args()
+
+if not args:
+    parser.print_help()
+    sys.exit(1)
+
+# somewhat self contained replacement for difflib.unified_diff
+# t1 and t2 are the text to be diffed
+# l1 and l2 are the text broken up into lines
+# header1 and header2 are the filenames for the diff output
+# context is the number of context lines
+# showfunc enables diff -p output
+# ignorews ignores all whitespace changes in the diff
+def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False, 
+             ignorews=False):
+    def contextend(l, len):
+        ret = l + context
+        if ret > len:
+            ret = len
+        return ret
+
+    def contextstart(l):
+        ret = l - context
+        if ret < 0:
+            return 0
+        return ret
+
+    def yieldhunk(hunk, header):
+        if header:
+            for x in header:
+                yield x
+        (astart, a2, bstart, b2, delta) = hunk
+        aend = contextend(a2, len(l1))
+        alen = aend - astart
+        blen = b2 - bstart + aend - a2
+
+        func = ""
+        if showfunc:
+            # walk backwards from the start of the context
+            # to find a line starting with an alphanumeric char.
+            for x in xrange(astart, -1, -1):
+                t = l1[x]
+                if funcre.match(t):
+                    func = ' ' + t[:40]
+                    break
+            
+        yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, 
+                                           bstart + 1, blen, func)
+        for x in delta:
+            yield x
+        for x in xrange(a2, aend):
+            yield ' ' + l1[x]
+
+    header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
+
+    if showfunc:
+        funcre = re.compile('\w')
+    if ignorews:
+        wsre = re.compile('[ \t]')
+
+    # bdiff.blocks gives us the matching sequences in the files.  The loop
+    # below finds the spaces between those matching sequences and translates
+    # them into diff output.
+    #
+    diff = blocks(t1, t2)
+    hunk = None
+    for i in xrange(len(diff)):
+        # The first match is special.
+        # we've either found a match starting at line 0 or a match later
+        # in the file.  If it starts later, old and new below will both be
+        # empty and we'll continue to the next match.
+        if i > 0:
+            s = diff[i-1]
+        else:
+            s = [0, 0, 0, 0]
+        delta = []
+        s1 = diff[i]
+        a1 = s[1]
+        a2 = s1[0]
+        b1 = s[3]
+        b2 = s1[2]
+        old = l1[a1:a2]
+        new = l2[b1:b2]
+
+        # bdiff sometimes gives huge matches past eof, this check eats them,
+        # and deals with the special first match case described above
+        if not old and not new:
+            continue
+
+        if ignorews:
+            wsold = wsre.sub('', "".join(old))
+            wsnew = wsre.sub('', "".join(new))
+            if wsold == wsnew:
+                continue
+
+        astart = contextstart(a1)
+        bstart = contextstart(b1)
+        prev = None
+        if hunk:
+            # join with the previous hunk if it falls inside the context
+            if astart < hunk[1] + context + 1:
+                prev = hunk
+                astart = hunk[1]
+                bstart = hunk[3]
+            else:
+                for x in yieldhunk(hunk, header):
+                    yield x
+                # we only want to yield the header if the files differ, and
+                # we only want to yield it once.
+                header = None
+        if prev:
+            # we've joined the previous hunk, record the new ending points.
+            hunk[1] = a2
+            hunk[3] = b2
+            delta = hunk[4]
+        else:
+            # create a new hunk
+            hunk = [ astart, a2, bstart, b2, delta ]
+
+        delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
+        delta[len(delta):] = [ '-' + x for x in old ]
+        delta[len(delta):] = [ '+' + x for x in new ]
+
+    if hunk:
+        for x in yieldhunk(hunk, header):
+            yield x
+
+# simple utility function to put all the
+# files from a directory tree into a dict
+def buildlist(names, top):
+    tlen = len(top)
+    for root, dirs, files in os.walk(top):
+        l = root[tlen + 1:]
+        for x in files:
+            p = os.path.join(root, x)
+            st = os.lstat(p)
+            if stat.S_ISREG(st.st_mode):
+                names[os.path.join(l, x)] = (st.st_dev, st.st_ino)
+
+def diff_files(file1, file2):
+    if file1 == None:
+        b = file(file2).read().splitlines(1)
+        l1 = "--- %s\n" % (file2)
+        l2 = "+++ %s\n" % (file2)
+        l3 = "@@ -0,0 +1,%d @@\n" % len(b)
+        l = [l1, l2, l3] + ["+" + e for e in b]
+    elif file2 == None:
+        a = file(file1).read().splitlines(1)
+        l1 = "--- %s\n" % (file1)
+        l2 = "+++ %s\n" % (file1)
+        l3 = "@@ -1,%d +0,0 @@\n" % len(a)
+        l = [l1, l2, l3] + ["-" + e for e in a]
+    else:
+        t1 = file(file1).read()
+        t2 = file(file2).read()
+        l1 = t1.splitlines(1)
+        l2 = t2.splitlines(1)
+        if options.difflib:
+            l = difflib.unified_diff(l1, l2, file1, file2)
+        else:
+            l = bunidiff(t1, t2, l1, l2, file1, file2, context=options.context,
+                     showfunc=options.show_c_function,
+                     ignorews=options.ignore_all_space)
+    for x in l:
+        if x[-1] != '\n':
+            x += "\n\ No newline at end of file\n"
+        print x,
+
+file1 = args[0]
+file2 = args[1]
+
+if os.path.isfile(file1) and os.path.isfile(file2):
+    diff_files(file1, file2)
+elif os.path.isdir(file1):
+    if not os.path.isdir(file2):
+        sys.stderr.write("file types don't match\n")
+        sys.exit(1)
+
+    d1 = {}
+    d2 = {}
+
+    buildlist(d1, file1)
+    buildlist(d2, file2)
+    keys = d1.keys()
+    keys.sort()
+    for x in keys:
+        if x not in d2:
+            f2 = None
+        else:
+            f2 = os.path.join(file2, x)
+            st1 = d1[x]
+            st2 = d2[x]
+            del d2[x]
+            if st1[0] == st2[0] and st1[1] == st2[1]:
+                sys.stderr.write("%s is a hard link\n" % x)
+                continue
+        x = os.path.join(file1, x)
+        diff_files(x, f2)
+    keys = d2.keys()
+    keys.sort()
+    for x in keys:
+        f1 = None
+        x = os.path.join(file2, x)
+        diff_files(f1, x)
+