Changeset: 461a7eb33720 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=461a7eb33720 Modified Files: monetdb5/extras/pyapi/Benchmarks/graph.py monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh Branch: pyapi Log Message:
Slightly redesign testing and add pyfits benchmark. diffs (truncated from 649 to 300 lines): diff --git a/monetdb5/extras/pyapi/Benchmarks/graph.py b/monetdb5/extras/pyapi/Benchmarks/graph.py --- a/monetdb5/extras/pyapi/Benchmarks/graph.py +++ b/monetdb5/extras/pyapi/Benchmarks/graph.py @@ -21,7 +21,7 @@ y_log = False line_plot = False fill_time = False data_start = 3 -graph_colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'white', 'orange', 'darkgreen', 'aliceblue', 'darkgoldenrod', 'darkorchid', 'darkred', 'forestgreen', 'lavenderblush', 'lightsalmon', 'midnightblue', 'moccasin', 'papayawhip', 'turquoise', 'violet'] +graph_colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'white', 'orange', 'darkgreen', 'darkgray', 'gold', 'darkorchid', 'darkred', 'violet', 'lavenderblush', 'lightsalmon', 'midnightblue', 'moccasin', 'papayawhip', 'turquoise', 'violet'] for i in range(3, len(arguments)): if '-xlog' in arguments[i]: x_log = True elif '-ylog' in arguments[i]: y_log = True diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py --- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py +++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py @@ -127,6 +127,8 @@ if len(split) > 1 and split[1].lower() = hot_test = False args_test_type = split[0] +print("Start test %s-%s" % (args_input_database, args_test_type)) + drop_cache = os.environ["DROP_CACHE_COMMAND"] def drop_all_caches(): array = numpy.zeros(100000) @@ -557,14 +559,18 @@ else: for i in range(0, 2): database_execute() for i in range(0,test_count): - if not hot_test: drop_all_caches() #drop caches everytime for cold tests + if not hot_test: + drop_all_caches() #drop caches everytime for cold tests + database_final() + os.system("%s %s %s %s" % (input_dir + "/randomstrings", size, input_type, input_file)) + database_load() start = time.time() database_execute() end = time.time() f.write(format_output(size, end - start)) database_clear() f.close() - database_final + database_final() input_type = "integer" function_name = str(args_test_type).lower() @@ -579,29 +585,34 @@ else: return numpy.min(numpy.sqrt(numpy.abs(a))) function = sqroot return_value = "double precision" + if function_name == "quantile": + def quantile(a): + return numpy.percentile(a, 50) + function = quantile + return_value = "integer" import inspect inspect_result = inspect.getsourcelines(function) source_code = "".join([" " + x.lstrip() for x in inspect_result[0][1:]]) - if str(args_input_database).lower() == "postgres": + if str(args_input_database).lower() == "plpython" or str(args_input_database).lower() == "postgres": client = os.environ["POSTGRES_CLIENT_COMMAND"] dropdb = os.environ["POSTGRES_DROPDB_COMMAND"] initdb = os.environ["POSTGRES_CREATEDB_COMMAND"] + input_dir = os.environ["POSTGRES_CWD"] function_name = str(args_test_type).lower() + createdb_file = "%s/%s.createdb.sql" % (input_dir, function_name) + run_file = "%s/%s.sql" % (input_dir, function_name) def postgres_init(): - input_dir = os.environ["POSTGRES_CWD"] + if str(args_input_database).lower() == "plpython": + if function_name == "identity": + source = " return a" + elif function_name == "sqroot": + source = " import math\n return math.sqrt(abs(a))" + else: raise Exception("Unsupported function %s" % function_name) - createdb_file = "%s/%s.createdb.sql" % (input_dir, function_name) - run_file = "%s/%s.sql" % (input_dir, function_name) - - if function_name == "identity": - source = " return a" - if function_name == "sqroot": - source = " import math\n return math.sqrt(abs(a))" - - createdb_sql = """ + createdb_sql = """ CREATE TABLE integers(i integer); COPY integers FROM '%s' DELIMITER ',' CSV; @@ -612,9 +623,18 @@ AS $$ %s $$ LANGUAGE plpythonu;""" % (input_file, function_name, return_value, source) - run_sql = """ - SELECT MIN(%s(i)) FROM integers; - """ % function_name + run_sql = """ + SELECT MIN(%s(i)) FROM integers; + """ % function_name + elif str(args_input_database).lower() == "postgres": + if function_name == "quantile": + run_sql = "SELECT percentile_cont(0.5) WITHIN GROUP(ORDER BY i) FROM integers;" + else: raise Exception("Unsupported function %s" % function_name) + createdb_sql = """ + CREATE TABLE integers(i integer); + + COPY integers FROM '%s' DELIMITER ',' CSV;""" % input_file + createdb = open(createdb_file, 'w+') createdb.write(createdb_sql) @@ -641,7 +661,7 @@ AS $$ execute_test(input_type, postgres_init, postgres_load, postgres_execute, postgres_clear, postgres_final) elif str(args_input_database).lower() == "sqlitemem" or str(args_input_database).lower() == "sqlitedb": - import csv, sqlite3 + import sqlite3 database_file = os.environ["SQLITE_DB_FILE"] database_name = ":memory:" if str(args_input_database).lower() == "sqlitemem" else database_file @@ -674,7 +694,7 @@ AS $$ conn.close() execute_test(input_type, sqlite_init, sqlite_load, sqlite_execute, sqlite_clear, sqlite_final) - elif str(args_input_database).lower() == "monetdbmapi" or str(args_input_database).lower() == "pyapi" or str(args_input_database).lower() == "pyapimap" or str(args_input_database).lower() == "rapi": + elif str(args_input_database).lower() == "monetdbmapi" or str(args_input_database).lower() == "pyapi" or str(args_input_database).lower() == "pyapimap" or str(args_input_database).lower() == "rapi" or str(args_input_database).lower() == "monetdb": import monetdb.sql for i in range(0, max_retries): try: @@ -701,18 +721,26 @@ AS $$ elif str(args_input_database).lower() == "rapi": if function_name == "identity": c.execute("CREATE FUNCTION FUNC_%s(a integer) RETURNS %s LANGUAGE R { min(a) };" % (function_name,return_value)) - if function_name == "sqroot": + elif function_name == "sqroot": c.execute("CREATE FUNCTION FUNC_%s(a integer) RETURNS %s LANGUAGE R { min(sqrt(abs(a))) };" % (function_name,return_value)) + elif function_name == "quantile": + c.execute("CREATE FUNCTION FUNC_%s(a integer) RETURNS %s LANGUAGE R { as.integer(quantile(a,0.5)) };" % (function_name,return_value)) + else: raise Exception("Unsupported function %s" % function_name) if str(args_input_database).lower() == "pyapi" or str(args_input_database).lower() == "pyapimap" or str(args_input_database).lower() == "rapi": def monetdb_execute(): c.execute("SELECT FUNC_%s(i) FROM integers;" % function_name) result = c.fetchall() - else: + elif str(args_input_database).lower() == "monetdbmapi": def monetdb_execute(): c.execute('SELECT * FROM integers') result = c.fetchall() function(numpy.array(result, dtype=numpy.int32)) + elif str(args_input_database).lower() == "monetdb": + def monetdb_execute(): + if function_name == "quantile": c.execute("SELECT quantile(i, 0.5) FROM integers;") + else: raise Exception("Unsupported function %s" % function_name) + result = c.fetchall() def monetdb_clear(): c.execute("DROP TABLE integers;") @@ -743,7 +771,7 @@ AS $$ def psycopg2_execute(): c.execute("SELECT * FROM integers;") result = c.fetchall() - print function(numpy.array(result, dtype=numpy.int32)) + function(numpy.array(result, dtype=numpy.int32)) def psycopg2_clear(): c.execute("DROP TABLE integers;") @@ -755,8 +783,7 @@ AS $$ execute_test(input_type, psycopg2_init, psycopg2_load, psycopg2_execute, psycopg2_clear, psycopg2_final) elif str(args_input_database).lower() == "pytables": - import tables - import csv + import tables, pandas as pd table_file = 'testfile.h5' @@ -771,12 +798,9 @@ AS $$ group = file.create_group('/', 'integers', 'integer_data') table = file.create_table(group, 'values', description, "example") values = table.row - with open(input_file, 'rb') as csvfile: - reader = csv.reader(csvfile) - result = [x for x in reader] - for x in result: - values['i'] = int(x[0]) - values.append() + for x in pd.read_csv(input_file).values: + values['i'] = int(x) + values.append() table.flush() file.close() @@ -814,18 +838,60 @@ AS $$ os.remove(input_file) execute_test(input_type, csv_init, csv_load, csv_execute, csv_clear, csv_final) + elif str(args_input_database).lower() == "pandascsv": + import pandas as pd + def csv_init(): + return None + + def csv_load(): + return None + + def csv_execute(): + a = pd.read_csv(input_file) + function(a.values) + + def csv_clear(): + return None + + def csv_final(): + os.remove(input_file) + + execute_test(input_type, csv_init, csv_load, csv_execute, csv_clear, csv_final) + elif str(args_input_database).lower() == "pyfits": + import pyfits, pandas as pd + hdu_file = 'tempfile.fits' + def pyfits_init(): + return None + + def pyfits_load(): + try: os.remove(hdu_file) + except: pass + array = pd.read_csv(input_file).values + numpy_array = numpy.array(array, dtype=numpy.int32) + hdu = pyfits.PrimaryHDU(numpy_array) + hdu.writeto(hdu_file) + + def pyfits_execute(): + hdulist = pyfits.open(hdu_file) + function(hdulist[0].data) + + def pyfits_clear(): + return None + + def pyfits_final(): + os.remove(input_file) + os.remove(hdu_file) + + execute_test(input_type, pyfits_init, pyfits_load, pyfits_execute, pyfits_clear, pyfits_final) elif str(args_input_database).lower() == "numpybinary": - import csv, numpy + import pandas as pd, numpy numpy_binary = 'tempfile.npy' def numpy_init(): return None def numpy_load(): - with open(input_file, 'rb') as csvfile: - reader = csv.reader(csvfile) - result = [int(x[0]) for x in reader] - numpy_array = numpy.array(result, dtype=numpy.int32) - numpy.save(numpy_binary, numpy_array) + array = pd.read_csv(input_file).values + numpy.save(numpy_binary, numpy.array(array, dtype=numpy.int32)) def numpy_execute(): numpy_array = numpy.load(numpy_binary) @@ -835,12 +901,12 @@ AS $$ return None def numpy_final(): + os.remove(numpy_binary) os.remove(input_file) - os.remove(numpy_binary) execute_test(input_type, numpy_init, numpy_load, numpy_execute, numpy_clear, numpy_final) elif str(args_input_database).lower() == "castra": - import castra, csv, shutil, pandas as pd + import castra, shutil, pandas as pd castra_binary = 'data.castra' def castra_init(): return None @@ -856,7 +922,7 @@ AS $$ def castra_execute(): c = castra.Castra(castra_binary, readonly=True) - print function(c[:, 'i'].values) + function(c[:, 'i'].values) def castra_clear(): shutil.rmtree(castra_binary) @@ -866,17 +932,15 @@ AS $$ execute_test(input_type, castra_init, castra_load, castra_execute, castra_clear, castra_final) elif str(args_input_database).lower() == "numpymemorymap": - import csv, numpy + import pandas as pd, numpy numpy_binary = 'tempfile.npy' def numpy_init(): return None def numpy_load(): - with open(input_file, 'rb') as csvfile: - reader = csv.reader(csvfile) - result = [int(x[0]) for x in reader] _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list