#! Python
# -*- coding: utf-8 -*-

import difflib
import itertools

def compare_moves(a, b, similarity_threshold=0.6):
    """
    Poor man's text comparison with simple moves check. Compares two strings using difflib 
    and additionally tries to detect moved blocks 
    by comparing similar deleted and inserted segments with each other - given the similarity_threshold.
    """

    seq_matcher = difflib.SequenceMatcher(isjunk=None, a=a, b=b, autojunk=False)
    diff_raw = [[tag, i1, i2, j1, j2, a[i1:i2], b[j1:j2]] for tag, i1, i2, j1, j2 in seq_matcher.get_opcodes()]

    deleted, inserted = {}, {}
    for tag, i1, i2, j1, j2 in seq_matcher.get_opcodes():
        if tag == 'delete':
            deleted[(i1, i2)] = [tag, i1, i2, j1, j2, a[i1:i2]]
        elif tag == 'insert':
            inserted[(i1, i2)] = [tag, i1, i2, j1, j2, b[j1:j2]]

    possibly_moved_blocks = []
    for deleted_item, inserted_item in itertools.product(deleted.values(), inserted.values()):
        similarity_ratio = difflib.SequenceMatcher(isjunk=None, a=deleted_item[5], b=inserted_item[5], autojunk=False).ratio()
        if similarity_ratio >= similarity_threshold:
            possibly_moved_blocks.append([deleted_item, inserted_item, similarity_ratio])

    print diff_raw
    print possibly_moved_blocks


if __name__ == "__main__":
    compare_moves("abcXYZdeABfghijklmnopABBCq", "ABCDabcdeACfgXYXZhijklmnopq", similarity_threshold = 0.6)
