MonetDB: pyapi - Added benchmark for PYTHON_MAP vs PYTHON.

Mark Raasveldt Mon, 13 Jul 2015 07:43:10 -0700

Changeset: 96de7e040d35 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=96de7e040d35
Modified Files:
        monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
        monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
        monetdb5/extras/pyapi/pyapi.c
Branch: pyapi
Log Message:


Added benchmark for PYTHON_MAP vs PYTHON.


diffs (154 lines):

diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py 
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -152,37 +152,64 @@ if str(arguments[1]).lower() == "input" 
 
     import time
     f = open(output_file + '.tsv', "w+")
-    if str(arguments[1]).lower() == "input-map":
-        f.write(format_headers('[AXIS]:Data Size (MB)', '[MEASUREMENT]:Total 
Time (s)'))
-    else:
-        f.write(format_headers('[AXIS]:Data Size (MB)', '[MEASUREMENT]:Total 
Time (s)', '[MEASUREMENT]:PyAPI Memory (MB)', '[MEASUREMENT]:PyAPI Time (s)'))
+    f.write(format_headers('[AXIS]:Data Size (MB)', '[MEASUREMENT]:Total Time 
(s)', '[MEASUREMENT]:PyAPI Memory (MB)', '[MEASUREMENT]:PyAPI Time (s)'))
     mb = []
     for i in range(4, len(arguments)):
         mb.append(float(arguments[i]))
 
     for size in mb:
-        cursor.execute('create temporary table integers as SELECT * FROM 
generate_integers(' + str(size) + ') with data;')
+        cursor.execute('create table integers as SELECT * FROM 
generate_integers(' + str(size) + ') with data;')
         #result_file = open(temp_file, 'r')
         #result_file.readline()
-        results = []
-        result_file = open(temp_file, 'w+')
-        result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n")
-        result_file.close();
-        for i in range(0,test_count):
-            start = time.time()
-            cursor.execute('select import_test(i) from integers;');
-            cursor.fetchall();
-            end = time.time()
-            list.append(results, end - start)
-        result_file = open(temp_file, 'r')
-        result_file.readline()
-        for result in results:
-            pyapi_results = result_file.readline().translate(None, 
'\n').split('\t')
-            if (str(arguments[1]).lower() == "input-map"):
-                f.write(format_output(size, result))
-            else:
+
+        if (str(arguments[1]).lower() == "input"):
+            results = []
+            result_file = open(temp_file, 'w+')
+            result_file.write("Peak Memory Usage (Bytes)\tExecution Time 
(s)\n")
+            result_file.close();
+            for i in range(0,test_count):
+                start = time.time()
+                cursor.execute('select import_test(i) from integers;');
+                cursor.fetchall();
+                end = time.time()
+                list.append(results, end - start)
+            result_file = open(temp_file, 'r')
+            result_file.readline()
+            for result in results:
+                pyapi_results = result_file.readline().translate(None, 
'\n').split('\t')
                 f.write(format_output(size, result, float(pyapi_results[0]) / 
1000**2, pyapi_results[1]))
-            f.flush()
+                f.flush()
+        else:
+            # for input-map we need to do some special analysis of the PyAPI 
output
+            # this is because every thread writes memory usage and execution 
time to the temp_file
+            # rather than just having one entry for per query
+            # so we have to analyse the result file for every query we perform
+            results = [[], [], []]
+            for i in range(0,test_count):
+                # clear the result file
+                result_file = open(temp_file, 'w+')
+                result_file.write("")
+                result_file.close();
+                # execute the query, measure the total time
+                start = time.time()
+                cursor.execute('select import_test(i) from integers;');
+                cursor.fetchall();
+                end = time.time()
+                list.append(results[0], end - start)
+                # now we need to analyze the result file
+                # we use the total memory usage of all threads (sum) and the 
highest of all the execution times of the threads (max)
+                memory_usage = 0
+                peak_execution_time = 0
+                with open(temp_file, 'r') as result_file:
+                    for line in result_file:
+                        pyapi_results = line.translate(None, '\n').split('\t')
+                        memory_usage = memory_usage + float(pyapi_results[0]) 
/ 1000 ** 2
+                        if float(pyapi_results[1]) > peak_execution_time: 
peak_execution_time = float(pyapi_results[1])
+                list.append(results[1], memory_usage)
+                list.append(results[2], peak_execution_time)
+            for i in range(0, len(results[0])):
+                f.write(format_output(size, results[0][i], results[1][i], 
results[2][i]))
+                f.flush()
         cursor.execute('drop table integers;')
     f.close()
 
diff --git a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh 
b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
--- a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
+++ b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
@@ -10,7 +10,7 @@ export MSERVERTEST='netstat -ant | grep 
 # Testing parameters
 # Input test (zero copy vs copy)
 # The input sizes to test (in MB)
-export INPUT_TESTING_SIZES="0.1 1 10 100"
+export INPUT_TESTING_SIZES="0.1 1 10 100 1000"
 # Amount of tests to run for each size
 export INPUT_TESTING_NTESTS=10
 
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -1480,26 +1480,27 @@ str
         }
         MT_lock_unset(&pyapiLock, "pyapi.evaluate");
         fprintf(stdout, "# MonetDB/Python module loaded\n");
-    }
 #ifdef _PYAPI_VERBOSE_
-    option_verbose = GDKgetenv_isyes(verbose_enableflag) || 
GDKgetenv_istrue(verbose_enableflag);
+        option_verbose = GDKgetenv_isyes(verbose_enableflag) || 
GDKgetenv_istrue(verbose_enableflag);
 #endif
 #ifdef _PYAPI_DEBUG_
-    option_debug = GDKgetenv_isyes(debug_enableflag) || 
GDKgetenv_istrue(debug_enableflag);
-    (void) option_debug;
+        option_debug = GDKgetenv_isyes(debug_enableflag) || 
GDKgetenv_istrue(debug_enableflag);
+        (void) option_debug;
 #endif
 #ifdef _PYAPI_WARNINGS_
-    option_warning = GDKgetenv_isyes(warning_enableflag) || 
GDKgetenv_istrue(warning_enableflag);
+        option_warning = GDKgetenv_isyes(warning_enableflag) || 
GDKgetenv_istrue(warning_enableflag);
 #endif
 #ifdef _PYAPI_TESTING_
-    //These flags are for testing purposes, they shouldn't be used for normal 
purposes
-    option_zerocopyinput = !(GDKgetenv_isyes(zerocopyinput_disableflag) || 
GDKgetenv_istrue(zerocopyinput_disableflag));
-    option_zerocopyoutput = !(GDKgetenv_isyes(zerocopyoutput_disableflag) || 
GDKgetenv_istrue(zerocopyoutput_disableflag));
-    option_numpy_string_array = GDKgetenv_isyes(numpy_string_array_enableflag) 
|| GDKgetenv_istrue(numpy_string_array_enableflag);
-    option_bytearray = !(GDKgetenv_isyes(bytearray_disableflag) || 
GDKgetenv_istrue(bytearray_disableflag));
-    option_alwaysunicode = (GDKgetenv_isyes(alwaysunicode_enableflag) || 
GDKgetenv_istrue(alwaysunicode_enableflag));
-    benchmark_output = GDKgetenv(benchmark_output_flag);
+        //These flags are for testing purposes, they shouldn't be used for 
normal purposes
+        option_zerocopyinput = !(GDKgetenv_isyes(zerocopyinput_disableflag) || 
GDKgetenv_istrue(zerocopyinput_disableflag));
+        option_zerocopyoutput = !(GDKgetenv_isyes(zerocopyoutput_disableflag) 
|| GDKgetenv_istrue(zerocopyoutput_disableflag));
+        option_numpy_string_array = 
GDKgetenv_isyes(numpy_string_array_enableflag) || 
GDKgetenv_istrue(numpy_string_array_enableflag);
+        option_bytearray = !(GDKgetenv_isyes(bytearray_disableflag) || 
GDKgetenv_istrue(bytearray_disableflag));
+        option_alwaysunicode = (GDKgetenv_isyes(alwaysunicode_enableflag) || 
GDKgetenv_istrue(alwaysunicode_enableflag));
+        benchmark_output = GDKgetenv(benchmark_output_flag);
+        fprintf(stdout, "# MonetDB/Python testing enabled.\n");
 #endif
+    }
     return MAL_SUCCEED;
 }
 
@@ -1887,6 +1888,7 @@ PyObject *PyArrayObject_FromBAT(PyInput 
                     j++;
                 }
             }
+            //printf("%s\n", (char*)b->T->vheap->base + b->T->heap.base[0] + 
GDK_VAROFFSET);
             break;
 #ifdef HAVE_HGE
         case TYPE_hge:
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

MonetDB: pyapi - Added benchmark for PYTHON_MAP vs PYTHON.

Reply via email to