#!/bin/python
""" Command line interface to difflib.py providing diffs in four formats:

* ndiff:    lists every line and highlights interline changes.
* context:  highlights clusters of changes in a before/after format.
* unified:  highlights clusters of changes in an inline format.
* html:     generates side by side comparison with change highlights.

"""

import sys, os, time, difflib, optparse, re

def main():
     # Configure the option parser
    usage = "usage: %prog [options] fromfile tofile"
    parser = optparse.OptionParser(usage)
    parser.add_option("-c", action="store_true", default=False,
                      help='Produce a context format diff (default)')
    parser.add_option("-u", action="store_true", default=False,
                      help='Produce a unified format diff')
    hlp = 'Produce HTML side by side diff (can use -c and -l in conjunction)'
    parser.add_option("-m", action="store_true", default=False, help=hlp)
    parser.add_option("-n", action="store_true", default=False,
                      help='Produce a ndiff format diff')
    parser.add_option("-l", "--lines", type="int", default=3,
                      help='Set number of context lines (default 3)')
    parser.add_option("-b", "--buffer", type="long", default=10L * 1024L * 1024L,
                      help='Set number of buffer bytes (default 10M)')
    (options, args) = parser.parse_args()

    if len(args) == 0:
        parser.print_help()
        sys.exit(1)
    if len(args) != 2:
        parser.error("need to specify both a fromfile and tofile")

    n = options.lines
    fromfile, tofile = args # as specified in the usage string

    # we're passing these as arguments to the diff function
    fromdate = time.ctime(os.stat(fromfile).st_mtime)
    todate = time.ctime(os.stat(tofile).st_mtime)
    fromfh = open(fromfile, 'U')
    tofh = open(tofile, 'U')
    fromlines = fromfh.readlines(options.buffer)
    tolines = tofh.readlines(options.buffer)

    first_time = True
    from_previous_line_count = 0
    to_previous_line_count = 0

    while len(fromlines) > 0 or len(tolines) > 0:
        if options.u:
            diff = difflib.unified_diff(fromlines, tolines, fromfile, tofile,
                                        fromdate, todate, n=n)
        elif options.n:
            diff = difflib.ndiff(fromlines, tolines)
        elif options.m:
            diff = difflib.HtmlDiff().make_file(fromlines, tolines, fromfile,
                                                tofile, context=options.c,
                                                numlines=n)
        else:
            diff = difflib.context_diff(fromlines, tolines, fromfile, tofile,
                                        fromdate, todate, n=n)

        # we're using writelines because diff is a generator

        if not options.c:
            # Multiple buffer runs for HTML (-m) output still have multiple tables.
            sys.stdout.writelines(diff)
        else:
            line_count = 0
            for aline in diff:
                line_count = line_count + 1
                # Do not emit the header lines for successive buffer runs.
                if (not first_time) and (line_count < 4): continue
                # Update line numbers based on previous count.
                if (not first_time) and (aline.startswith('*** ') or aline.startswith('--- ')):
                    def fix_line_number(m):
                        v1 = int(m.group(2)) + from_previous_line_count
                        v2 = int(m.group(3)) + to_previous_line_count
                        return m.group(1) + str(v1) + ',' + str(v2) + m.group(4)

                    aline = re.sub(r'([-*]+ )(\d+),(\d+)( [-*]+.*)', fix_line_number, aline)
                sys.stdout.write(aline)

        from_previous_line_count = from_previous_line_count + len(fromlines)
        to_previous_line_count = to_previous_line_count + len(tolines)

        fromlines = fromfh.readlines(options.buffer)
        tolines = tofh.readlines(options.buffer)
        first_time = False

if __name__ == '__main__':
    main()
