Changeset: dc67d325758f for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=dc67d325758f
Modified Files:
        monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
        monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
Branch: pyapi
Log Message:

Use C program for generating random strings in benchmarks.


diffs (165 lines):

diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py 
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -281,67 +281,27 @@ elif str(arguments[1]).lower() == "outpu
     cursor.execute('rollback')
 
 elif str(arguments[1]).lower() == "string_samelength" or 
str(arguments[1]).lower() == "string_extremeunicode":
-    #todo: this
-    #benchmark_dir = os.environ["PYAPI_BENCHMARKS_DIR"]
-    #os.system("gcc " + benchmark_dir + "/randomstrings.c -o randomstrings")
+    benchmark_dir = os.environ["PYAPI_BENCHMARKS_DIR"]
+    os.system("gcc " + benchmark_dir + "/randomstrings.c -o randomstrings")
+    result_path = os.path.join(os.getcwd(), 'result.txt')
 
-    #def generate_strings_samelength():
-    #   file = open("result.txt", 'r')
-    #   content = file.read()
-    #   strings = content.split(' ')
-    #   result = numpy.array(strings)
-    #   return result
     if str(arguments[1]).lower() == "string_samelength":
-        def generate_strings_samelength(mb, length):
-            def random_string(length):
-                import random
-                import string
-                result = ""
-                for i in range(0, length):
-                    result += random.choice(['0', '1', '2', '3', '4', '5', 
'6', '7', '8', '9'])
-                return result
-            import random
-            import math
-            byte_size = mb * 1000 * 1000
-            string_size_byte = length
-            string_count = int(byte_size / string_size_byte)
-            if length < 15:
-                min_int = math.pow(10, length - 1)
-                max_int = math.pow(10, length) - 1
-                strings = numpy.random.random_integers(min_int, max_int, 
string_count).astype('S' + str(length))
-                return strings
-            else:
-                strings = numpy.zeros(string_count, dtype='S' + str(length))
-                for i in range(0, string_count):
-                    strings[i] = random_string(length)
-                return strings
-        cursor.execute(export_function(generate_strings_samelength, ['float', 
'integer'], ['i string'], table=True, test=False))
+        def generate_strings_samelength(f, length):
+            file = open(f, 'r')
+            content = file.read()
+            strings = content.split(' ')
+            result = numpy.array(strings)
+            return result
+        cursor.execute(export_function(generate_strings_samelength, ['string', 
'integer'], ['i string'], table=True, test=False))
     else:
-        def generate_strings_samelength(mb, length):
-            def random_string(length):
-                import random
-                import string
-                result = ""
-                for i in range(0, length):
-                    result += random.choice(['0', '1', '2', '3', '4', '5', 
'6', '7', '8', '9'])
-                return result
-            import random
-            import math
-            byte_size = mb * 1000 * 1000
-            string_size_byte = length
-            string_count = int(byte_size / string_size_byte)
-            strings = None
-            if length < 15:
-                min_int = math.pow(10, length - 1)
-                max_int = math.pow(10, length) - 1
-                strings = numpy.random.random_integers(min_int, max_int, 
string_count).astype('U' + str(length))
-            else:
-                strings = numpy.zeros(string_count, dtype='U' + str(length))
-                for i in range(0, string_count):
-                    strings[i] = random_string(length)
-            strings[string_count - 1] = unichr(0x100) * length
-            return strings
-        cursor.execute(export_function(generate_strings_samelength, ['float', 
'integer'], ['i string'], table=True, test=False))
+        def generate_strings_samelength(f, length):
+            file = open(f, 'r')
+            content = file.read()
+            strings = content.split(' ')
+            result = numpy.array(strings).astype("U%d" % length)
+            result[len(result) - 1] = unichr(0x100) * length
+            return result
+        cursor.execute(export_function(generate_strings_samelength, ['string', 
'integer'], ['i string'], table=True, test=False))
 
     mb = []
     lens = []
@@ -360,8 +320,8 @@ elif str(arguments[1]).lower() == "strin
     for j in range(0,len(mb)):
         size = mb[j]
         length = lens[j]
-        #os.system("./randomstrings %s %s result.txt" % (str(size), 
str(length)))
-        cursor.execute('create table strings as SELECT * FROM 
generate_strings_samelength(' + str(size) + ',' + str(length) + ') with data;')
+        os.system("%s %s %s %s" % ("./randomstrings", str(size), str(length), 
result_path))
+        cursor.execute('create table strings as SELECT * FROM 
generate_strings_samelength(\'' + result_path + '\',' + str(length) + ') with 
data;')
         results = []
         result_file = open(temp_file, 'w+')
         result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n")
@@ -385,7 +345,11 @@ elif str(arguments[1]).lower() == "strin
     #cursor.execute('drop function import_test');
     cursor.execute('rollback')
 elif str(arguments[1]).lower() == "string_extremelength":
-    def generate_strings_extreme(extreme_length, string_count):
+    benchmark_dir = os.environ["PYAPI_BENCHMARKS_DIR"]
+    os.system("gcc " + benchmark_dir + "/randomstrings.c -o randomstrings")
+    result_path = os.path.join(os.getcwd(), 'result.txt')
+
+    def generate_strings_extreme(f, extreme_length):
         def random_string(length):
             import random
             import string
@@ -393,15 +357,13 @@ elif str(arguments[1]).lower() == "strin
             for i in range(0, length):
                 result += random.choice(string.printable)
             return result
-        import random
-        import math
-        result = numpy.array([], dtype=object)
-        result = numpy.append(result, random_string(extreme_length))
-        for i in range(0, string_count - 1):
-            result = numpy.append(result, random_string(1))
+        file = open(f, 'r')
+        content = file.read()
+        strings = content.split(' ')
+        result = numpy.array(strings).astype('object')
+        result[0] = random_string(extreme_length)
         return result
-
-    cursor.execute(export_function(generate_strings_extreme, ['integer', 
'integer'], ['i string'], table=True, test=False))
+    cursor.execute(export_function(generate_strings_extreme, ['string', 
'integer'], ['i string'], table=True, test=False))
 
     extreme_lengths = []
     string_counts = []
@@ -420,7 +382,11 @@ elif str(arguments[1]).lower() == "strin
     for j in range(0,len(extreme_lengths)):
         str_len = extreme_lengths[j]
         str_count = string_counts[j]
-        cursor.execute('create table strings as SELECT * FROM 
generate_strings_extreme(' + str(str_len) + ',' + str(str_count) + ') with 
data;')
+        string_mb = float(str_count) / (1000 ** 2)
+        print("%s %s %s %s" % ("./randomstrings", str(string_mb), str(1), 
result_path))
+        os.system("%s %s %s %s" % ("./randomstrings", str(string_mb), str(1), 
result_path))
+        cursor.execute('create table strings as SELECT * FROM 
generate_strings_extreme(\'' + result_path + '\',' + str(str_len) + ') with 
data;')
+        print('create table strings as SELECT * FROM 
generate_strings_extreme(\'' + result_path + '\',' + str(str_len) + ') with 
data;')
         results = []
         result_file = open(temp_file, 'w+')
         result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n")
@@ -565,3 +531,4 @@ else:
     print("Unrecognized test type \"" + arguments[1] + "\", exiting...")
     sys.exit(1)
 
+
diff --git a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh 
b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
--- a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
+++ b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
@@ -1,7 +1,7 @@
 
 
 # The base directory of testing, a new folder is created in this base 
directory [$PYAPI_TEST_DIR], and everything is done in that new folder
-export PYAPI_BASE_DIR=/home/mytherin/
+export PYAPI_BASE_DIR=/export/scratch1/raasveld
 # The terminal to start mserver with, examples are gnome-terminal, xterm, 
konsole
 export TERMINAL=x-terminal-emulator
 # Port used by the MSERVER
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to