Changeset: e9589d2e559a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e9589d2e559a
Added Files:
        monetdb5/extras/pyapi/benchmark.c
        monetdb5/extras/pyapi/benchmark.h
Modified Files:
        monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
        monetdb5/extras/pyapi/Makefile.ag
        monetdb5/extras/pyapi/pyapi.c
Branch: pyapi
Log Message:

Added memory tracking using malloc/free hooks.


diffs (truncated from 354 to 300 lines):

diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py 
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -75,11 +75,17 @@ try: main_memory = str(int(os.popen("cat
 except: pass
 os_name = ' '.join(platform.dist())
 
-def format_headers(measurement_xaxis, measurement_yaxis, measurement_zaxis = 
None):
-    return 'Python Ver\tNumpy Ver\tCPU Cores\tMain Memory (GB)\tOS\t' + 
measurement_xaxis + '\t' + measurement_yaxis + ('\t' + measurement_zaxis if 
measurement_zaxis is not None else '') + '\n'
+def format_headers(*measurement_axes):
+    result = 'Python Ver\tNumpy Ver\tCPU Cores\tMain Memory (GB)\tOS'
+    for measurement in measurement_axes:
+        result = result + '\t' + str(measurement)
+    return result + '\n'
 
-def format_output(measurement_x, measurement_y, measurement_z = None):
-    return python_version + '\t' + numpy_version + '\t' + amount_of_cores + 
'\t' + main_memory + '\t' + os_name + '\t' + str(measurement_x) + '\t' + 
str(measurement_y) + ('\t' + str(measurement_z) if measurement_z is not None 
else '') + '\n'
+def format_output(*measurements):
+    result = python_version + '\t' + numpy_version + '\t' + amount_of_cores + 
'\t' + main_memory + '\t' + os_name
+    for measurement in measurements:
+        result = result + '\t' + str(measurement)
+    return result + '\n'
 
 import os
 import sys
@@ -96,6 +102,10 @@ if (len(arguments) <= 4):
     quit()
 
 output_file = os.path.join(os.getcwd(), arguments[2])
+temp_file = os.path.join(os.getcwd(), 'tempfile.tsv')
+result_file = open(temp_file, 'w+')
+result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n")
+result_file.close();
 test_count = int(arguments[3])
 max_retries = 15
 
@@ -134,7 +144,7 @@ if str(arguments[1]).lower() == "input" 
             integers[i] = random.randint(min_int, max_int)
         return integers
 
-    cursor.execute(export_function(generate_integers, ['integer'], ['i 
integer'], table=True))
+    cursor.execute(export_function(generate_integers, ['float'], ['i 
integer'], table=True))
 
     # Our import test function returns a single boolean value and doesn't do 
anything with the actual input
     # This way the input loading is the only relevant factor in running time, 
because the time taken for function execution/output handling is constant
@@ -145,15 +155,15 @@ if str(arguments[1]).lower() == "input" 
 
     import time
     f = open(output_file + '.tsv', "w+")
-    f.write(format_headers('Data Size (MB)', 'Time (s)'))
+    f.write(format_headers('Data Size (MB)', 'Total Time (s)'))
     mb = []
     for i in range(4, len(arguments)):
         mb.append(float(arguments[i]))
 
     for size in mb:
-        start = time.time()
         cursor.execute('create temporary table integers as SELECT * FROM 
generate_integers(' + str(size) + ') with data;')
-        end = time.time()
+        #result_file = open(temp_file, 'r')
+        #result_file.readline()
         for i in range(0,test_count):
             start = time.time()
             cursor.execute('select import_test(i) from integers;');
@@ -259,9 +269,7 @@ elif str(arguments[1]).lower() == "strin
     for j in range(0,len(mb)):
         size = mb[j]
         length = lens[j]
-        start = time.time()
         cursor.execute('create table strings as SELECT * FROM 
generate_strings_samelength(' + str(size) + ',' + str(length) + ') with data;')
-        end = time.time()
         for i in range(0,test_count):
             start = time.time()
             cursor.execute('select import_test(i) from strings;');
@@ -311,9 +319,7 @@ elif str(arguments[1]).lower() == "strin
     for j in range(0,len(extreme_lengths)):
         str_len = extreme_lengths[j]
         str_count = string_counts[j]
-        start = time.time()
         cursor.execute('create table strings as SELECT * FROM 
generate_strings_extreme(' + str(str_len) + ',' + str(str_count) + ') with 
data;')
-        end = time.time()
         for i in range(0,test_count):
             start = time.time()
             cursor.execute('select import_test(i) from strings;');
diff --git a/monetdb5/extras/pyapi/Makefile.ag 
b/monetdb5/extras/pyapi/Makefile.ag
--- a/monetdb5/extras/pyapi/Makefile.ag
+++ b/monetdb5/extras/pyapi/Makefile.ag
@@ -17,7 +17,7 @@ MTSAFE
 lib__pyapi = {
        MODULE
        DIR = libdir/monetdb5
-       SOURCES = pyapi.c pyapi.h unicode.c unicode.h pytypes.c pytypes.h 
type_conversion.c type_conversion.h bytearray.c bytearray.h formatinput.c 
formatinput.h
+       SOURCES = pyapi.c pyapi.h unicode.c unicode.h pytypes.c pytypes.h 
type_conversion.c type_conversion.h bytearray.c bytearray.h formatinput.c 
formatinput.h benchmark.c benchmark.h
        XDEPS = $(libpy_LIBDEP)
        LIBS = ../../tools/libmonetdb5 \
        ../../../gdk/libbat \
diff --git a/monetdb5/extras/pyapi/benchmark.c 
b/monetdb5/extras/pyapi/benchmark.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/pyapi/benchmark.c
@@ -0,0 +1,155 @@
+
+#include "benchmark.h"
+
+#include <malloc.h>
+#include <string.h>
+#include <time.h>
+
+static unsigned long long memtrace_current_memory_bytes = 0;
+static unsigned long long memtrace_memory_peak = 0;
+static double benchmark_start_time = 0;
+static double benchmark_end_time = 0;
+
+#ifdef __MALLOC_DEPRECATED //if this isn't defined MALLOC_HOOKS aren't 
supported, probably
+// We are using malloc/free hooks which are deprecated, so we have to ignore 
the warnings
+// (This is obviously bad practice, but the alternative is having to recompile 
Python and then tracing both PyMemAlloc/Realloc and GDKmalloc/realloc calls, 
this is much easier, and we aren't using them in a thread context and no thread 
safety is why they are deprecated in the first place) 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+/* Prototypes for our hooks.  */
+static void *my_malloc_hook (size_t, const void *);
+static void my_free_hook (void*, const void *);
+static void add_ptr(void *ptr, size_t size);
+static void remove_ptr(void *ptr);
+static void* (*old_malloc_hook)(size_t, const void*)=NULL;
+static void (*old_free_hook)(void*, const void*)=NULL;
+
+//we keep a datastore of pointers and the amount of size that was malloced 
when the pointer was created
+static void **memtrace_pointers = NULL;   //the pointers
+static size_t *memtrace_sizes = NULL;     //the sizes
+static size_t memtrace_max_size = 100;    //the max size of the _pointers and 
_sizes arrays
+static size_t memtrace_current_size = -1; //the current index
+
+void add_ptr(void *ptr, size_t size)
+{
+       memtrace_current_size++;
+       if (memtrace_current_size >= memtrace_max_size)
+       {
+               //if the max_size is exceeded extend the array
+               void **new_ptrs = malloc(sizeof(void*) * memtrace_max_size * 2);
+               size_t *new_sizes = malloc(sizeof(size_t*) * memtrace_max_size 
* 2);
+               memcpy(new_ptrs, memtrace_pointers, memtrace_max_size * 
sizeof(void*));
+               memcpy(new_sizes, memtrace_sizes, memtrace_max_size * 
sizeof(size_t*));
+               free(memtrace_pointers); free(memtrace_sizes);
+               memtrace_pointers = new_ptrs; memtrace_sizes = new_sizes;
+               memtrace_max_size = memtrace_max_size * 2;
+       }
+
+       memtrace_pointers[memtrace_current_size] = ptr;
+       memtrace_sizes[memtrace_current_size] = size;
+       memtrace_current_memory_bytes += size;
+       if (memtrace_current_memory_bytes > memtrace_memory_peak) 
memtrace_memory_peak = memtrace_current_memory_bytes;
+}
+
+void remove_ptr(void *ptr)
+{
+       //because malloc hooks inherently aren't thread safe we don't care to 
make this thread safe either
+       size_t i;
+       for(i = 0; i <= memtrace_current_size; i++)
+       {
+               if (memtrace_pointers[i] == ptr)
+               {
+                       memtrace_current_memory_bytes -= memtrace_sizes[i];
+                       memtrace_pointers[i] = 
memtrace_pointers[memtrace_current_size];
+                       memtrace_sizes[i] = 
memtrace_sizes[memtrace_current_size];
+                       memtrace_current_size--;
+                       return;
+               }
+       }
+}
+
+void init_hook (void)
+{
+       if (memtrace_pointers == NULL) {
+               memtrace_pointers = malloc(memtrace_max_size * sizeof(void*));
+               memtrace_sizes = malloc(memtrace_max_size * sizeof(size_t*));
+       }
+       memtrace_current_memory_bytes = 0;
+       memtrace_memory_peak = 0;
+
+       old_malloc_hook = __malloc_hook;
+       old_free_hook = __free_hook;
+       __malloc_hook = my_malloc_hook;
+       __free_hook = my_free_hook;
+}
+
+void revert_hook (void)
+{
+       __malloc_hook = old_malloc_hook;
+       __free_hook = old_free_hook;
+       memtrace_current_size = -1;
+       memtrace_max_size = 100;
+       free(memtrace_pointers);
+       free(memtrace_sizes);
+       memtrace_pointers = NULL; memtrace_sizes = NULL;
+}
+
+static void *my_malloc_hook (size_t size, const void *caller)
+{
+       void *result; (void) caller;
+       /* Restore all old hooks */
+       __malloc_hook = old_malloc_hook;
+       __free_hook = old_free_hook;
+       /* Call recursively */
+       result = malloc (size);
+       add_ptr(result, size);
+       /* Restore our own hooks */
+       __malloc_hook = my_malloc_hook;
+       __free_hook = my_free_hook;
+       return result;
+}
+
+static void my_free_hook (void *ptr, const void *caller)
+{
+       (void) caller;
+       /* Restore all old hooks */
+       __malloc_hook = old_malloc_hook;
+       __free_hook = old_free_hook;
+       /* Call recursively */
+       free (ptr);
+       remove_ptr(ptr);
+       /* Restore our own hooks */
+       __malloc_hook = my_malloc_hook;
+       __free_hook = my_free_hook;
+}
+#pragma GCC diagnostic pop
+#else
+
+void init_hook (void) {}
+void revert_hook (void) {}
+
+#endif
+
+unsigned long long GET_MEMORY_PEAK(void)
+{
+       return memtrace_memory_peak;
+}
+
+unsigned long long GET_MEMORY_USAGE(void)
+{
+       return memtrace_current_memory_bytes;
+}
+
+double GET_ELAPSED_TIME(void)
+{
+       return (double)(benchmark_end_time - benchmark_start_time) / 
CLOCKS_PER_SEC;
+}
+
+void start_timer(void)
+{
+       benchmark_start_time = clock();
+}
+
+void end_timer(void)
+{
+       benchmark_end_time = clock();
+}
diff --git a/monetdb5/extras/pyapi/benchmark.h 
b/monetdb5/extras/pyapi/benchmark.h
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/pyapi/benchmark.h
@@ -0,0 +1,32 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 2008-2015 MonetDB B.V.
+ */
+
+/*
+ * M. Raaasveldt
+ * Contains hooks for malloc/free as well as timer 
+ */
+
+#ifndef _PYAPI_BENCHMARK_LIB_
+#define _PYAPI_BENCHMARK_LIB_
+
+//starts a performance timer with clock()
+void start_timer(void);
+//ends the timer
+void end_timer(void);
+//sets up malloc hooks, not thread safe, do not use in thread context
+void init_hook (void);
+//detaches malloc hooks, not thread safe
+void revert_hook (void);
+//gets peak memory usage between init_hook() and revert_hook() calls
+unsigned long long GET_MEMORY_PEAK(void);
+//get current memory usage (note that this only measures the malloc calls 
between init_hook() and revert_hook() calls)
+unsigned long long GET_MEMORY_USAGE(void);
+//gets the elapsed time of a timer started with start_timer() and end_timer()
+double GET_ELAPSED_TIME(void);
+
+#endif
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -29,6 +29,7 @@
 #include "bytearray.h"
 #include "type_conversion.h"
 #include "formatinput.h"
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to