Changeset: 96de7e040d35 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=96de7e040d35 Modified Files: monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh monetdb5/extras/pyapi/pyapi.c Branch: pyapi Log Message:
Added benchmark for PYTHON_MAP vs PYTHON. diffs (154 lines): diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py --- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py +++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py @@ -152,37 +152,64 @@ if str(arguments[1]).lower() == "input" import time f = open(output_file + '.tsv', "w+") - if str(arguments[1]).lower() == "input-map": - f.write(format_headers('[AXIS]:Data Size (MB)', '[MEASUREMENT]:Total Time (s)')) - else: - f.write(format_headers('[AXIS]:Data Size (MB)', '[MEASUREMENT]:Total Time (s)', '[MEASUREMENT]:PyAPI Memory (MB)', '[MEASUREMENT]:PyAPI Time (s)')) + f.write(format_headers('[AXIS]:Data Size (MB)', '[MEASUREMENT]:Total Time (s)', '[MEASUREMENT]:PyAPI Memory (MB)', '[MEASUREMENT]:PyAPI Time (s)')) mb = [] for i in range(4, len(arguments)): mb.append(float(arguments[i])) for size in mb: - cursor.execute('create temporary table integers as SELECT * FROM generate_integers(' + str(size) + ') with data;') + cursor.execute('create table integers as SELECT * FROM generate_integers(' + str(size) + ') with data;') #result_file = open(temp_file, 'r') #result_file.readline() - results = [] - result_file = open(temp_file, 'w+') - result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n") - result_file.close(); - for i in range(0,test_count): - start = time.time() - cursor.execute('select import_test(i) from integers;'); - cursor.fetchall(); - end = time.time() - list.append(results, end - start) - result_file = open(temp_file, 'r') - result_file.readline() - for result in results: - pyapi_results = result_file.readline().translate(None, '\n').split('\t') - if (str(arguments[1]).lower() == "input-map"): - f.write(format_output(size, result)) - else: + + if (str(arguments[1]).lower() == "input"): + results = [] + result_file = open(temp_file, 'w+') + result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n") + result_file.close(); + for i in range(0,test_count): + start = time.time() + cursor.execute('select import_test(i) from integers;'); + cursor.fetchall(); + end = time.time() + list.append(results, end - start) + result_file = open(temp_file, 'r') + result_file.readline() + for result in results: + pyapi_results = result_file.readline().translate(None, '\n').split('\t') f.write(format_output(size, result, float(pyapi_results[0]) / 1000**2, pyapi_results[1])) - f.flush() + f.flush() + else: + # for input-map we need to do some special analysis of the PyAPI output + # this is because every thread writes memory usage and execution time to the temp_file + # rather than just having one entry for per query + # so we have to analyse the result file for every query we perform + results = [[], [], []] + for i in range(0,test_count): + # clear the result file + result_file = open(temp_file, 'w+') + result_file.write("") + result_file.close(); + # execute the query, measure the total time + start = time.time() + cursor.execute('select import_test(i) from integers;'); + cursor.fetchall(); + end = time.time() + list.append(results[0], end - start) + # now we need to analyze the result file + # we use the total memory usage of all threads (sum) and the highest of all the execution times of the threads (max) + memory_usage = 0 + peak_execution_time = 0 + with open(temp_file, 'r') as result_file: + for line in result_file: + pyapi_results = line.translate(None, '\n').split('\t') + memory_usage = memory_usage + float(pyapi_results[0]) / 1000 ** 2 + if float(pyapi_results[1]) > peak_execution_time: peak_execution_time = float(pyapi_results[1]) + list.append(results[1], memory_usage) + list.append(results[2], peak_execution_time) + for i in range(0, len(results[0])): + f.write(format_output(size, results[0][i], results[1][i], results[2][i])) + f.flush() cursor.execute('drop table integers;') f.close() diff --git a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh --- a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh +++ b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh @@ -10,7 +10,7 @@ export MSERVERTEST='netstat -ant | grep # Testing parameters # Input test (zero copy vs copy) # The input sizes to test (in MB) -export INPUT_TESTING_SIZES="0.1 1 10 100" +export INPUT_TESTING_SIZES="0.1 1 10 100 1000" # Amount of tests to run for each size export INPUT_TESTING_NTESTS=10 diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -1480,26 +1480,27 @@ str } MT_lock_unset(&pyapiLock, "pyapi.evaluate"); fprintf(stdout, "# MonetDB/Python module loaded\n"); - } #ifdef _PYAPI_VERBOSE_ - option_verbose = GDKgetenv_isyes(verbose_enableflag) || GDKgetenv_istrue(verbose_enableflag); + option_verbose = GDKgetenv_isyes(verbose_enableflag) || GDKgetenv_istrue(verbose_enableflag); #endif #ifdef _PYAPI_DEBUG_ - option_debug = GDKgetenv_isyes(debug_enableflag) || GDKgetenv_istrue(debug_enableflag); - (void) option_debug; + option_debug = GDKgetenv_isyes(debug_enableflag) || GDKgetenv_istrue(debug_enableflag); + (void) option_debug; #endif #ifdef _PYAPI_WARNINGS_ - option_warning = GDKgetenv_isyes(warning_enableflag) || GDKgetenv_istrue(warning_enableflag); + option_warning = GDKgetenv_isyes(warning_enableflag) || GDKgetenv_istrue(warning_enableflag); #endif #ifdef _PYAPI_TESTING_ - //These flags are for testing purposes, they shouldn't be used for normal purposes - option_zerocopyinput = !(GDKgetenv_isyes(zerocopyinput_disableflag) || GDKgetenv_istrue(zerocopyinput_disableflag)); - option_zerocopyoutput = !(GDKgetenv_isyes(zerocopyoutput_disableflag) || GDKgetenv_istrue(zerocopyoutput_disableflag)); - option_numpy_string_array = GDKgetenv_isyes(numpy_string_array_enableflag) || GDKgetenv_istrue(numpy_string_array_enableflag); - option_bytearray = !(GDKgetenv_isyes(bytearray_disableflag) || GDKgetenv_istrue(bytearray_disableflag)); - option_alwaysunicode = (GDKgetenv_isyes(alwaysunicode_enableflag) || GDKgetenv_istrue(alwaysunicode_enableflag)); - benchmark_output = GDKgetenv(benchmark_output_flag); + //These flags are for testing purposes, they shouldn't be used for normal purposes + option_zerocopyinput = !(GDKgetenv_isyes(zerocopyinput_disableflag) || GDKgetenv_istrue(zerocopyinput_disableflag)); + option_zerocopyoutput = !(GDKgetenv_isyes(zerocopyoutput_disableflag) || GDKgetenv_istrue(zerocopyoutput_disableflag)); + option_numpy_string_array = GDKgetenv_isyes(numpy_string_array_enableflag) || GDKgetenv_istrue(numpy_string_array_enableflag); + option_bytearray = !(GDKgetenv_isyes(bytearray_disableflag) || GDKgetenv_istrue(bytearray_disableflag)); + option_alwaysunicode = (GDKgetenv_isyes(alwaysunicode_enableflag) || GDKgetenv_istrue(alwaysunicode_enableflag)); + benchmark_output = GDKgetenv(benchmark_output_flag); + fprintf(stdout, "# MonetDB/Python testing enabled.\n"); #endif + } return MAL_SUCCEED; } @@ -1887,6 +1888,7 @@ PyObject *PyArrayObject_FromBAT(PyInput j++; } } + //printf("%s\n", (char*)b->T->vheap->base + b->T->heap.base[0] + GDK_VAROFFSET); break; #ifdef HAVE_HGE case TYPE_hge: _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list