Changeset: e9589d2e559a for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e9589d2e559a Added Files: monetdb5/extras/pyapi/benchmark.c monetdb5/extras/pyapi/benchmark.h Modified Files: monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py monetdb5/extras/pyapi/Makefile.ag monetdb5/extras/pyapi/pyapi.c Branch: pyapi Log Message:
Added memory tracking using malloc/free hooks. diffs (truncated from 354 to 300 lines): diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py --- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py +++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py @@ -75,11 +75,17 @@ try: main_memory = str(int(os.popen("cat except: pass os_name = ' '.join(platform.dist()) -def format_headers(measurement_xaxis, measurement_yaxis, measurement_zaxis = None): - return 'Python Ver\tNumpy Ver\tCPU Cores\tMain Memory (GB)\tOS\t' + measurement_xaxis + '\t' + measurement_yaxis + ('\t' + measurement_zaxis if measurement_zaxis is not None else '') + '\n' +def format_headers(*measurement_axes): + result = 'Python Ver\tNumpy Ver\tCPU Cores\tMain Memory (GB)\tOS' + for measurement in measurement_axes: + result = result + '\t' + str(measurement) + return result + '\n' -def format_output(measurement_x, measurement_y, measurement_z = None): - return python_version + '\t' + numpy_version + '\t' + amount_of_cores + '\t' + main_memory + '\t' + os_name + '\t' + str(measurement_x) + '\t' + str(measurement_y) + ('\t' + str(measurement_z) if measurement_z is not None else '') + '\n' +def format_output(*measurements): + result = python_version + '\t' + numpy_version + '\t' + amount_of_cores + '\t' + main_memory + '\t' + os_name + for measurement in measurements: + result = result + '\t' + str(measurement) + return result + '\n' import os import sys @@ -96,6 +102,10 @@ if (len(arguments) <= 4): quit() output_file = os.path.join(os.getcwd(), arguments[2]) +temp_file = os.path.join(os.getcwd(), 'tempfile.tsv') +result_file = open(temp_file, 'w+') +result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n") +result_file.close(); test_count = int(arguments[3]) max_retries = 15 @@ -134,7 +144,7 @@ if str(arguments[1]).lower() == "input" integers[i] = random.randint(min_int, max_int) return integers - cursor.execute(export_function(generate_integers, ['integer'], ['i integer'], table=True)) + cursor.execute(export_function(generate_integers, ['float'], ['i integer'], table=True)) # Our import test function returns a single boolean value and doesn't do anything with the actual input # This way the input loading is the only relevant factor in running time, because the time taken for function execution/output handling is constant @@ -145,15 +155,15 @@ if str(arguments[1]).lower() == "input" import time f = open(output_file + '.tsv', "w+") - f.write(format_headers('Data Size (MB)', 'Time (s)')) + f.write(format_headers('Data Size (MB)', 'Total Time (s)')) mb = [] for i in range(4, len(arguments)): mb.append(float(arguments[i])) for size in mb: - start = time.time() cursor.execute('create temporary table integers as SELECT * FROM generate_integers(' + str(size) + ') with data;') - end = time.time() + #result_file = open(temp_file, 'r') + #result_file.readline() for i in range(0,test_count): start = time.time() cursor.execute('select import_test(i) from integers;'); @@ -259,9 +269,7 @@ elif str(arguments[1]).lower() == "strin for j in range(0,len(mb)): size = mb[j] length = lens[j] - start = time.time() cursor.execute('create table strings as SELECT * FROM generate_strings_samelength(' + str(size) + ',' + str(length) + ') with data;') - end = time.time() for i in range(0,test_count): start = time.time() cursor.execute('select import_test(i) from strings;'); @@ -311,9 +319,7 @@ elif str(arguments[1]).lower() == "strin for j in range(0,len(extreme_lengths)): str_len = extreme_lengths[j] str_count = string_counts[j] - start = time.time() cursor.execute('create table strings as SELECT * FROM generate_strings_extreme(' + str(str_len) + ',' + str(str_count) + ') with data;') - end = time.time() for i in range(0,test_count): start = time.time() cursor.execute('select import_test(i) from strings;'); diff --git a/monetdb5/extras/pyapi/Makefile.ag b/monetdb5/extras/pyapi/Makefile.ag --- a/monetdb5/extras/pyapi/Makefile.ag +++ b/monetdb5/extras/pyapi/Makefile.ag @@ -17,7 +17,7 @@ MTSAFE lib__pyapi = { MODULE DIR = libdir/monetdb5 - SOURCES = pyapi.c pyapi.h unicode.c unicode.h pytypes.c pytypes.h type_conversion.c type_conversion.h bytearray.c bytearray.h formatinput.c formatinput.h + SOURCES = pyapi.c pyapi.h unicode.c unicode.h pytypes.c pytypes.h type_conversion.c type_conversion.h bytearray.c bytearray.h formatinput.c formatinput.h benchmark.c benchmark.h XDEPS = $(libpy_LIBDEP) LIBS = ../../tools/libmonetdb5 \ ../../../gdk/libbat \ diff --git a/monetdb5/extras/pyapi/benchmark.c b/monetdb5/extras/pyapi/benchmark.c new file mode 100644 --- /dev/null +++ b/monetdb5/extras/pyapi/benchmark.c @@ -0,0 +1,155 @@ + +#include "benchmark.h" + +#include <malloc.h> +#include <string.h> +#include <time.h> + +static unsigned long long memtrace_current_memory_bytes = 0; +static unsigned long long memtrace_memory_peak = 0; +static double benchmark_start_time = 0; +static double benchmark_end_time = 0; + +#ifdef __MALLOC_DEPRECATED //if this isn't defined MALLOC_HOOKS aren't supported, probably +// We are using malloc/free hooks which are deprecated, so we have to ignore the warnings +// (This is obviously bad practice, but the alternative is having to recompile Python and then tracing both PyMemAlloc/Realloc and GDKmalloc/realloc calls, this is much easier, and we aren't using them in a thread context and no thread safety is why they are deprecated in the first place) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +/* Prototypes for our hooks. */ +static void *my_malloc_hook (size_t, const void *); +static void my_free_hook (void*, const void *); +static void add_ptr(void *ptr, size_t size); +static void remove_ptr(void *ptr); +static void* (*old_malloc_hook)(size_t, const void*)=NULL; +static void (*old_free_hook)(void*, const void*)=NULL; + +//we keep a datastore of pointers and the amount of size that was malloced when the pointer was created +static void **memtrace_pointers = NULL; //the pointers +static size_t *memtrace_sizes = NULL; //the sizes +static size_t memtrace_max_size = 100; //the max size of the _pointers and _sizes arrays +static size_t memtrace_current_size = -1; //the current index + +void add_ptr(void *ptr, size_t size) +{ + memtrace_current_size++; + if (memtrace_current_size >= memtrace_max_size) + { + //if the max_size is exceeded extend the array + void **new_ptrs = malloc(sizeof(void*) * memtrace_max_size * 2); + size_t *new_sizes = malloc(sizeof(size_t*) * memtrace_max_size * 2); + memcpy(new_ptrs, memtrace_pointers, memtrace_max_size * sizeof(void*)); + memcpy(new_sizes, memtrace_sizes, memtrace_max_size * sizeof(size_t*)); + free(memtrace_pointers); free(memtrace_sizes); + memtrace_pointers = new_ptrs; memtrace_sizes = new_sizes; + memtrace_max_size = memtrace_max_size * 2; + } + + memtrace_pointers[memtrace_current_size] = ptr; + memtrace_sizes[memtrace_current_size] = size; + memtrace_current_memory_bytes += size; + if (memtrace_current_memory_bytes > memtrace_memory_peak) memtrace_memory_peak = memtrace_current_memory_bytes; +} + +void remove_ptr(void *ptr) +{ + //because malloc hooks inherently aren't thread safe we don't care to make this thread safe either + size_t i; + for(i = 0; i <= memtrace_current_size; i++) + { + if (memtrace_pointers[i] == ptr) + { + memtrace_current_memory_bytes -= memtrace_sizes[i]; + memtrace_pointers[i] = memtrace_pointers[memtrace_current_size]; + memtrace_sizes[i] = memtrace_sizes[memtrace_current_size]; + memtrace_current_size--; + return; + } + } +} + +void init_hook (void) +{ + if (memtrace_pointers == NULL) { + memtrace_pointers = malloc(memtrace_max_size * sizeof(void*)); + memtrace_sizes = malloc(memtrace_max_size * sizeof(size_t*)); + } + memtrace_current_memory_bytes = 0; + memtrace_memory_peak = 0; + + old_malloc_hook = __malloc_hook; + old_free_hook = __free_hook; + __malloc_hook = my_malloc_hook; + __free_hook = my_free_hook; +} + +void revert_hook (void) +{ + __malloc_hook = old_malloc_hook; + __free_hook = old_free_hook; + memtrace_current_size = -1; + memtrace_max_size = 100; + free(memtrace_pointers); + free(memtrace_sizes); + memtrace_pointers = NULL; memtrace_sizes = NULL; +} + +static void *my_malloc_hook (size_t size, const void *caller) +{ + void *result; (void) caller; + /* Restore all old hooks */ + __malloc_hook = old_malloc_hook; + __free_hook = old_free_hook; + /* Call recursively */ + result = malloc (size); + add_ptr(result, size); + /* Restore our own hooks */ + __malloc_hook = my_malloc_hook; + __free_hook = my_free_hook; + return result; +} + +static void my_free_hook (void *ptr, const void *caller) +{ + (void) caller; + /* Restore all old hooks */ + __malloc_hook = old_malloc_hook; + __free_hook = old_free_hook; + /* Call recursively */ + free (ptr); + remove_ptr(ptr); + /* Restore our own hooks */ + __malloc_hook = my_malloc_hook; + __free_hook = my_free_hook; +} +#pragma GCC diagnostic pop +#else + +void init_hook (void) {} +void revert_hook (void) {} + +#endif + +unsigned long long GET_MEMORY_PEAK(void) +{ + return memtrace_memory_peak; +} + +unsigned long long GET_MEMORY_USAGE(void) +{ + return memtrace_current_memory_bytes; +} + +double GET_ELAPSED_TIME(void) +{ + return (double)(benchmark_end_time - benchmark_start_time) / CLOCKS_PER_SEC; +} + +void start_timer(void) +{ + benchmark_start_time = clock(); +} + +void end_timer(void) +{ + benchmark_end_time = clock(); +} diff --git a/monetdb5/extras/pyapi/benchmark.h b/monetdb5/extras/pyapi/benchmark.h new file mode 100644 --- /dev/null +++ b/monetdb5/extras/pyapi/benchmark.h @@ -0,0 +1,32 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 2008-2015 MonetDB B.V. + */ + +/* + * M. Raaasveldt + * Contains hooks for malloc/free as well as timer + */ + +#ifndef _PYAPI_BENCHMARK_LIB_ +#define _PYAPI_BENCHMARK_LIB_ + +//starts a performance timer with clock() +void start_timer(void); +//ends the timer +void end_timer(void); +//sets up malloc hooks, not thread safe, do not use in thread context +void init_hook (void); +//detaches malloc hooks, not thread safe +void revert_hook (void); +//gets peak memory usage between init_hook() and revert_hook() calls +unsigned long long GET_MEMORY_PEAK(void); +//get current memory usage (note that this only measures the malloc calls between init_hook() and revert_hook() calls) +unsigned long long GET_MEMORY_USAGE(void); +//gets the elapsed time of a timer started with start_timer() and end_timer() +double GET_ELAPSED_TIME(void); + +#endif diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -29,6 +29,7 @@ #include "bytearray.h" #include "type_conversion.h" #include "formatinput.h" _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list