# Pretty dirty benchmark to demonstrate that binary search is
# competitive against dictionaries based on hashes

from time import time
import numpy
from query_ext import BinarySearch

nelem = 8191180
bits64 = 2**63
numpy.random.seed(1)

def calibrate_loop():
    for i in xrange(nelem):
        x = rnd[i]

def do_dict_queries():
    for i in xrange(nelem):
        x = rnd[i]
        t = id2name[x]

def do_binary_queries(query):
    for i in xrange(nelem):
        x = rnd[i]
        t = query[x]

# Create the original random list (unsigned int64)
id1 = numpy.empty(nelem, dtype='i8')
id1[:] = numpy.random.rand(nelem)*bits64

# Create a list of values to iterate over
rnd = numpy.random.randint(0, nelem, nelem)
rnd = id1[rnd]

print "Creating the dictionary..."
id2name = {}
t1 = time()
for i in xrange(nelem):
    id2name[id1[i]] = "This is a bunch of text..."
print "Time for dict creation:", round(time()-t1, 3)

print "Calibrating loop..."
t1 = time()
calibrate_loop()
tref = time()-t1
print "Calibrating time:", round(tref, 3)

print "Timing queries with a dict..."
t1 = time()
do_dict_queries()
print "Time for dict query:", round((time()-t1)-tref, 3)

# Get rid of the dictionary
del id2name

# Build the array of strings
str2 = numpy.empty(nelem, dtype='S30')
str2[:] = "This is a bunch of text..."

#print "Sorting original list..."
t1 = time()
sid = numpy.sort(id1)
revid = id1.argsort()
#print "Time for sorting:", round(time()-t1, 3)

print "Timing queries with binary search..."
t1 = time()
query = BinarySearch(sid, revid, str2)
do_binary_queries(query)
print "Time for binary queries:", round((time()-t1)-tref, 3)
