__author__ = "floyd fuh (floyd_fuh@yahoo.de)"

from levenshtein import *
import unittest

class GoodValues(unittest.TestCase):
    good_values = []
    testNumbers = []
    steps = 7
    for i in range(0,steps+1):
        testNumbers.append(float(i)/steps)
    testNumbers.extend([0.0000000000000000001,0.99999999999999999999999])
    maxleftA = 1
    maxleftB = 1
    maxrightA = 1
    maxrightB = 100
    for i in testNumbers:
        for leftA in range(0,maxleftA+1):
             for leftB in range(0,maxleftB+1):
                 for rightA in range(0,maxrightA+1):
                     for rightB in range(0,maxrightB+1):
                         good_values.append(('a'*leftA+'b'*leftB, 'a'*rightA+'b'*rightB, i))

    def testRelativeDistanceBooleanGoodValues(self):
        """relative_distance_boolean and relative_distance <= threshold should give the same results"""
            
        for left, right, threshold in self.good_values:
            resBoolean = relative_distance_boolean(left, right, threshold)
            resRelativeLibrary = relative_distance(left, right) >= threshold

            self.assertEqual(resBoolean,resRelativeLibrary)            

    
class BadValues(unittest.TestCase):
    def testNotIterable(self):
        """relative_distance_boolean should fail if not iterable"""
        self.assertRaises(TypeError, relative_distance_boolean, 3, "a", 0)
        self.assertRaises(TypeError, relative_distance_boolean, "a", 3, 0)
        self.assertRaises(TypeError, relative_distance_boolean, 3, "a", 1)
        self.assertRaises(TypeError, relative_distance_boolean, "a", 3, 1)
        self.assertRaises(TypeError, relative_distance_boolean, 3, "a", 0.5)
        self.assertRaises(TypeError, relative_distance_boolean, "a", 3, 0.5)
    
    def testNoNumber(self):
        self.assertRaises(ValueError, relative_distance_boolean, "a", "a", "a")

    def testNumberNotInRange(self):
        self.assertRaises(TypeError, relative_distance_boolean, "a", "a", 1.1)
        self.assertRaises(TypeError, relative_distance_boolean, "a", "a", 1.00000000001)
        self.assertRaises(TypeError, relative_distance_boolean, "a", "a", -1)
        self.assertRaises(TypeError, relative_distance_boolean, "a", "a", -0.0000000001)

def do_performance_tests():
    '''
    The following tests are performance tests that should only proof,
    that the relative_distance_boolean is faster than the relative_distance
    method. Theses tests should NOT be run on a daily basis because they
    simply take too long (about 45 seconds).
    
    If you want to run them, uncomment the line at the end of this file.
    '''
    import urllib2
    import time
    
    performance_tests = []

    google = urllib2.urlopen("http://www.google.com").read()
    google2 = urllib2.urlopen("http://www.google.co.uk/").read()

    yahoo = urllib2.urlopen("http://www.yahoo.com/").read()
    yahoo2 = urllib2.urlopen("http://uk.yahoo.com/").read()

    bing = urllib2.urlopen("http://www.bing.com/").read()
    bing2 = urllib2.urlopen("http://www.bing.com/?cc=gb").read()


    performance_tests.append((google, google, 0.99999999))
    performance_tests.append((google2, google2, 0.99999999))
    performance_tests.append((yahoo, yahoo, 0.99999999))
    performance_tests.append((yahoo2, yahoo2, 0.99999999))
    performance_tests.append((bing, bing, 0.99999999))
    performance_tests.append((bing2, bing2, 0.99999999))

    performance_tests.append((bing, google, 0.99999999))
    performance_tests.append((bing, yahoo, 0.99999999))
    performance_tests.append((yahoo, google, 0.99999999))
    performance_tests.append((yahoo2, google, 0.99999999))
    performance_tests.append((bing2, google, 0.99999999))
    performance_tests.append((yahoo, google2, 0.99999999))

    performance_tests.append((google, google, 0.1))
    performance_tests.append((google2, google2, 0.1))
    performance_tests.append((yahoo, yahoo, 0.1))
    performance_tests.append((yahoo2, yahoo2, 0.1))
    performance_tests.append((bing, bing, 0.1))
    performance_tests.append((bing2, bing2, 0.1))

    performance_tests.append((bing, google, 0.6))
    performance_tests.append((bing, yahoo, 0.6))
    performance_tests.append((yahoo, google, 0.6))
    performance_tests.append((yahoo2, google, 0.6))
    performance_tests.append((bing2, google, 0.6))
    performance_tests.append((yahoo, google2, 0.6))


    #performance tests
    numOfTests = 20
    numOfOverallTests = 4

    boolean_time_sum = 0
    original_time_sum = 0
    true_result_count = 0
    false_result_count = 0

    for i in range(0, numOfOverallTests):

        boolean_win_count = 0
        original_win_count = 0

        for e, d, f in performance_tests:
            print e[:40]
            print d[:40]
            k = '?'
            start = time.time()
            for i in range(0, numOfTests):
                relative_distance_boolean(e, d, f)
            end = time.time()
            k = relative_distance_boolean(e, d, f)
            if k:
                true_result_count += numOfTests
            else:
                false_result_count += numOfTests
            boolean_time = end - start
            boolean_time_sum += boolean_time
            print "   boolean (" + str(k) + ") :", boolean_time

            k = '?'
            start = time.time()
            for i in range(0, numOfTests):
                relative_distance(e, d) >= f
            end = time.time()
            k = relative_distance(e, d) >= f
            original_time = end - start
            original_time_sum += original_time
            print "   original (" + str(k) + ") :", original_time

            if original_time > boolean_time:
                boolean_win_count += 1
            else:
                original_win_count += 1


    print '-----------'
    print 'The new boolean method was in ' + str(boolean_win_count) + ' tests faster'
    print 'The original library was in ' + str(original_win_count)+' tests faster'
    print ''
    print 'The result was '+str(false_result_count)+' times False and '+str(true_result_count)+' True'
    print 'The new boolean method is better if the answer is often False'
    print ''
    print 'Time needed overall:'    
    print 'Boolean:', boolean_time_sum / numOfOverallTests
    print 'Original:', original_time_sum / numOfOverallTests


if __name__ == "__main__":
    ##We could change the upper bounds list first (not needed, hard coded)
    #from levenshtein import _generate_upper_bounds
    #_generate_upper_bounds(40,30)
    ##We could do performance tests (they take very long)
    #do_performance_tests()
    unittest.main()
