Changeset: 461a7eb33720 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=461a7eb33720
Modified Files:
        monetdb5/extras/pyapi/Benchmarks/graph.py
        monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
        monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
Branch: pyapi
Log Message:

Slightly redesign testing and add pyfits benchmark.


diffs (truncated from 649 to 300 lines):

diff --git a/monetdb5/extras/pyapi/Benchmarks/graph.py 
b/monetdb5/extras/pyapi/Benchmarks/graph.py
--- a/monetdb5/extras/pyapi/Benchmarks/graph.py
+++ b/monetdb5/extras/pyapi/Benchmarks/graph.py
@@ -21,7 +21,7 @@ y_log = False
 line_plot = False
 fill_time = False
 data_start = 3
-graph_colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 
'white', 'orange', 'darkgreen', 'aliceblue', 'darkgoldenrod', 'darkorchid', 
'darkred', 'forestgreen', 'lavenderblush', 'lightsalmon', 'midnightblue', 
'moccasin', 'papayawhip', 'turquoise', 'violet']
+graph_colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 
'white', 'orange', 'darkgreen', 'darkgray', 'gold', 'darkorchid', 'darkred', 
'violet', 'lavenderblush', 'lightsalmon', 'midnightblue', 'moccasin', 
'papayawhip', 'turquoise', 'violet']
 for i in range(3, len(arguments)):
     if '-xlog' in arguments[i]: x_log = True
     elif '-ylog' in arguments[i]: y_log = True
diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py 
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -127,6 +127,8 @@ if len(split) > 1 and split[1].lower() =
     hot_test = False
 args_test_type = split[0]
 
+print("Start test %s-%s" % (args_input_database, args_test_type))
+
 drop_cache = os.environ["DROP_CACHE_COMMAND"]
 def drop_all_caches():
     array = numpy.zeros(100000)
@@ -557,14 +559,18 @@ else:
                 for i in range(0, 2):
                     database_execute()
             for i in range(0,test_count):
-                if not hot_test: drop_all_caches() #drop caches everytime for 
cold tests
+                if not hot_test:
+                    drop_all_caches() #drop caches everytime for cold tests
+                    database_final()
+                    os.system("%s %s %s %s" % (input_dir + "/randomstrings", 
size, input_type, input_file))
+                    database_load()
                 start = time.time()
                 database_execute()
                 end = time.time()
                 f.write(format_output(size, end - start))
             database_clear()
         f.close()
-        database_final
+        database_final()
 
     input_type = "integer"
     function_name = str(args_test_type).lower()
@@ -579,29 +585,34 @@ else:
             return numpy.min(numpy.sqrt(numpy.abs(a)))
         function = sqroot
         return_value = "double precision"
+    if function_name == "quantile":
+        def quantile(a):
+            return numpy.percentile(a, 50)
+        function = quantile
+        return_value = "integer"
 
     import inspect
     inspect_result = inspect.getsourcelines(function)
     source_code = "".join(["  " + x.lstrip() for x in inspect_result[0][1:]])
 
-    if str(args_input_database).lower() == "postgres":
+    if str(args_input_database).lower() == "plpython" or 
str(args_input_database).lower() == "postgres":
         client = os.environ["POSTGRES_CLIENT_COMMAND"]
         dropdb = os.environ["POSTGRES_DROPDB_COMMAND"]
         initdb = os.environ["POSTGRES_CREATEDB_COMMAND"]
 
+        input_dir = os.environ["POSTGRES_CWD"]
         function_name = str(args_test_type).lower()
+        createdb_file = "%s/%s.createdb.sql" % (input_dir, function_name)
+        run_file = "%s/%s.sql" % (input_dir, function_name)
         def postgres_init():
-            input_dir = os.environ["POSTGRES_CWD"]
+            if str(args_input_database).lower() == "plpython":
+                if function_name == "identity":
+                    source = "  return a"
+                elif function_name == "sqroot":
+                    source = "  import math\n  return math.sqrt(abs(a))"
+                else: raise Exception("Unsupported function %s" % 
function_name)
 
-            createdb_file = "%s/%s.createdb.sql" % (input_dir, function_name)
-            run_file = "%s/%s.sql" % (input_dir, function_name)
-
-            if function_name == "identity":
-                source = "  return a"
-            if function_name == "sqroot":
-                source = "  import math\n  return math.sqrt(abs(a))"
-
-            createdb_sql = """
+                createdb_sql = """
 CREATE TABLE integers(i integer);
 
 COPY integers FROM '%s' DELIMITER ',' CSV;
@@ -612,9 +623,18 @@ AS $$
 %s
 $$ LANGUAGE plpythonu;""" % (input_file, function_name, return_value, source)
 
-            run_sql = """
-            SELECT MIN(%s(i)) FROM integers;
-            """ % function_name
+                run_sql = """
+                SELECT MIN(%s(i)) FROM integers;
+                """ % function_name
+            elif str(args_input_database).lower() == "postgres":
+                if function_name == "quantile":
+                    run_sql = "SELECT percentile_cont(0.5) WITHIN GROUP(ORDER 
BY i) FROM integers;"
+                else: raise Exception("Unsupported function %s" % 
function_name)
+                createdb_sql = """
+                CREATE TABLE integers(i integer);
+
+                COPY integers FROM '%s' DELIMITER ',' CSV;""" % input_file
+
 
             createdb = open(createdb_file, 'w+')
             createdb.write(createdb_sql)
@@ -641,7 +661,7 @@ AS $$
 
         execute_test(input_type, postgres_init, postgres_load, 
postgres_execute, postgres_clear, postgres_final)
     elif str(args_input_database).lower() == "sqlitemem" or 
str(args_input_database).lower() == "sqlitedb":
-        import csv, sqlite3
+        import sqlite3
         database_file = os.environ["SQLITE_DB_FILE"]
         database_name = ":memory:" if str(args_input_database).lower() == 
"sqlitemem" else database_file
 
@@ -674,7 +694,7 @@ AS $$
             conn.close()
 
         execute_test(input_type, sqlite_init, sqlite_load, sqlite_execute, 
sqlite_clear, sqlite_final)
-    elif str(args_input_database).lower() == "monetdbmapi" or 
str(args_input_database).lower() == "pyapi" or str(args_input_database).lower() 
== "pyapimap" or str(args_input_database).lower() == "rapi":
+    elif str(args_input_database).lower() == "monetdbmapi" or 
str(args_input_database).lower() == "pyapi" or str(args_input_database).lower() 
== "pyapimap" or str(args_input_database).lower() == "rapi"  or 
str(args_input_database).lower() == "monetdb":
         import monetdb.sql
         for i in range(0, max_retries):
             try:
@@ -701,18 +721,26 @@ AS $$
             elif str(args_input_database).lower() == "rapi":
                 if function_name == "identity":
                     c.execute("CREATE FUNCTION FUNC_%s(a integer) RETURNS %s 
LANGUAGE R { min(a) };" % (function_name,return_value))
-                if function_name == "sqroot":
+                elif function_name == "sqroot":
                     c.execute("CREATE FUNCTION FUNC_%s(a integer) RETURNS %s 
LANGUAGE R { min(sqrt(abs(a))) };" % (function_name,return_value))
+                elif function_name == "quantile":
+                    c.execute("CREATE FUNCTION FUNC_%s(a integer) RETURNS %s 
LANGUAGE R { as.integer(quantile(a,0.5)) };" % (function_name,return_value))
+                else: raise Exception("Unsupported function %s" % 
function_name)
 
         if str(args_input_database).lower() == "pyapi" or 
str(args_input_database).lower() == "pyapimap" or 
str(args_input_database).lower() == "rapi":
             def monetdb_execute():
                 c.execute("SELECT FUNC_%s(i) FROM integers;" % function_name)
                 result = c.fetchall()
-        else:
+        elif str(args_input_database).lower() == "monetdbmapi":
             def monetdb_execute():
                 c.execute('SELECT * FROM integers')
                 result = c.fetchall()
                 function(numpy.array(result, dtype=numpy.int32))
+        elif str(args_input_database).lower() == "monetdb":
+            def monetdb_execute():
+                if function_name == "quantile": c.execute("SELECT quantile(i, 
0.5) FROM integers;")
+                else: raise Exception("Unsupported function %s" % 
function_name)
+                result = c.fetchall()
 
         def monetdb_clear():
             c.execute("DROP TABLE integers;")
@@ -743,7 +771,7 @@ AS $$
         def psycopg2_execute():
             c.execute("SELECT * FROM integers;")
             result = c.fetchall()
-            print function(numpy.array(result, dtype=numpy.int32))
+            function(numpy.array(result, dtype=numpy.int32))
 
         def psycopg2_clear():
             c.execute("DROP TABLE integers;")
@@ -755,8 +783,7 @@ AS $$
 
         execute_test(input_type, psycopg2_init, psycopg2_load, 
psycopg2_execute, psycopg2_clear, psycopg2_final)
     elif str(args_input_database).lower() == "pytables":
-        import tables
-        import csv
+        import tables, pandas as pd
 
         table_file = 'testfile.h5'
 
@@ -771,12 +798,9 @@ AS $$
             group = file.create_group('/', 'integers', 'integer_data')
             table = file.create_table(group, 'values', description, "example")
             values = table.row
-            with open(input_file, 'rb') as csvfile:
-                reader = csv.reader(csvfile)
-                result = [x for x in reader]
-                for x in result:
-                    values['i'] = int(x[0])
-                    values.append()
+            for x in pd.read_csv(input_file).values:
+                values['i'] = int(x)
+                values.append()
             table.flush()
             file.close()
 
@@ -814,18 +838,60 @@ AS $$
             os.remove(input_file)
 
         execute_test(input_type, csv_init, csv_load, csv_execute, csv_clear, 
csv_final)
+    elif str(args_input_database).lower() == "pandascsv":
+        import pandas as pd
+        def csv_init():
+            return None
+
+        def csv_load():
+            return None
+
+        def csv_execute():
+            a = pd.read_csv(input_file)
+            function(a.values)
+
+        def csv_clear():
+            return None
+
+        def csv_final():
+            os.remove(input_file)
+
+        execute_test(input_type, csv_init, csv_load, csv_execute, csv_clear, 
csv_final)
+    elif str(args_input_database).lower() == "pyfits":
+        import pyfits, pandas as pd
+        hdu_file = 'tempfile.fits'
+        def pyfits_init():
+            return None
+
+        def pyfits_load():
+            try: os.remove(hdu_file)
+            except: pass
+            array = pd.read_csv(input_file).values
+            numpy_array = numpy.array(array, dtype=numpy.int32)
+            hdu = pyfits.PrimaryHDU(numpy_array)
+            hdu.writeto(hdu_file)
+
+        def pyfits_execute():
+            hdulist = pyfits.open(hdu_file)
+            function(hdulist[0].data)
+
+        def pyfits_clear():
+            return None
+
+        def pyfits_final():
+            os.remove(input_file)
+            os.remove(hdu_file)
+
+        execute_test(input_type, pyfits_init, pyfits_load, pyfits_execute, 
pyfits_clear, pyfits_final)
     elif str(args_input_database).lower() == "numpybinary":
-        import csv, numpy
+        import pandas as pd, numpy
         numpy_binary = 'tempfile.npy'
         def numpy_init():
             return None
 
         def numpy_load():
-            with open(input_file, 'rb') as csvfile:
-                reader = csv.reader(csvfile)
-                result = [int(x[0]) for x in reader]
-                numpy_array = numpy.array(result, dtype=numpy.int32)
-                numpy.save(numpy_binary, numpy_array)
+            array = pd.read_csv(input_file).values
+            numpy.save(numpy_binary, numpy.array(array, dtype=numpy.int32))
 
         def numpy_execute():
             numpy_array = numpy.load(numpy_binary)
@@ -835,12 +901,12 @@ AS $$
             return None
 
         def numpy_final():
+            os.remove(numpy_binary)
             os.remove(input_file)
-            os.remove(numpy_binary)
 
         execute_test(input_type, numpy_init, numpy_load, numpy_execute, 
numpy_clear, numpy_final)
     elif str(args_input_database).lower() == "castra":
-        import castra, csv, shutil, pandas as pd
+        import castra, shutil, pandas as pd
         castra_binary = 'data.castra'
         def castra_init():
             return None
@@ -856,7 +922,7 @@ AS $$
 
         def castra_execute():
             c = castra.Castra(castra_binary, readonly=True)
-            print function(c[:, 'i'].values)
+            function(c[:, 'i'].values)
 
         def castra_clear():
             shutil.rmtree(castra_binary)
@@ -866,17 +932,15 @@ AS $$
 
         execute_test(input_type, castra_init, castra_load, castra_execute, 
castra_clear, castra_final)
     elif str(args_input_database).lower() == "numpymemorymap":
-        import csv, numpy
+        import pandas as pd, numpy
         numpy_binary = 'tempfile.npy'
         def numpy_init():
             return None
 
         def numpy_load():
-            with open(input_file, 'rb') as csvfile:
-                reader = csv.reader(csvfile)
-                result = [int(x[0]) for x in reader]
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to