Changeset: fdebce90a3f3 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fdebce90a3f3 Modified Files: monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh monetdb5/extras/pyapi/benchmark.c monetdb5/extras/pyapi/pyapi.c monetdb5/extras/pyapi/pyapi.h monetdb5/extras/pyapi/pytypes.h monetdb5/extras/pyapi/type_conversion.c monetdb5/extras/pyapi/unicode.c Branch: pyapi Log Message:
Added more documentation, and moved some code around for increased clarity. diffs (truncated from 1520 to 300 lines): diff --git a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh --- a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh +++ b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh @@ -4,7 +4,7 @@ export PYAPI_BASE_DIR=$HOME # The terminal to start mserver with, examples are 'gnome-terminal', 'xterm', 'konsole' export TERMINAL=x-terminal-emulator -# A command that tests if the mserver is still running (used to find out when the shutting down is completed) +# A command that tests if the mserver is still running (used to find out when the shutting down of mserver is completed) export MSERVERTEST='netstat -ant | grep "127.0.0.1:50000.*LISTEN">/dev/null' # Testing parameters @@ -53,6 +53,20 @@ export PYAPI_TESTFILE=$PYAPI_MONETDB_DIR # Graph file location export PYAPI_GRAPHFILE=$PYAPI_MONETDB_DIR/monetdb5/extras/pyapi/Benchmarks/graph.py +# Try a bunch of popular different terminals +type $TERMINAL >/dev/null 2>&1 +if [ $? -ne 0 ]; then + export TERMINAL=gnome-terminal +fi +type $TERMINAL >/dev/null 2>&1 +if [ $? -ne 0 ]; then + export TERMINAL=xterm +fi +type $TERMINAL >/dev/null 2>&1 +if [ $? -ne 0 ]; then + export TERMINAL=konsole +fi + function pyapi_build { echo "Making directory $PYAPI_TEST_DIR." mkdir $PYAPI_TEST_DIR && cd $PYAPI_TEST_DIR diff --git a/monetdb5/extras/pyapi/benchmark.c b/monetdb5/extras/pyapi/benchmark.c --- a/monetdb5/extras/pyapi/benchmark.c +++ b/monetdb5/extras/pyapi/benchmark.c @@ -1,3 +1,10 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 2008-2015 MonetDB B.V. + */ #include "benchmark.h" @@ -13,7 +20,10 @@ static unsigned long long memtrace_memor #ifdef __MALLOC_DEPRECATED //if this isn't defined MALLOC_HOOKS aren't supported, probably // We are using malloc/free hooks which are deprecated, so we have to ignore the warnings // (This is obviously bad practice, but the alternative is having to recompile Python and then tracing both PyMemAlloc/Realloc and GDKmalloc/realloc calls, this is much easier, and we aren't using them in a thread context and no thread safety is why they are deprecated in the first place) + +#ifndef __INTEL_COMPILER #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif /* Prototypes for our hooks. */ static void *my_malloc_hook (size_t, const void *); static void my_free_hook (void*, const void *); diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -15,14 +15,16 @@ #include "sql_catalog.h" #include "pyapi.h" +// Python library #undef _GNU_SOURCE #undef _XOPEN_SOURCE #undef _POSIX_C_SOURCE #include <Python.h> +// Numpy Library #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #ifdef __INTEL_COMPILER -//intel compiler complains about trailing comma's in numpy source code, so hopefully this works +// Intel compiler complains about trailing comma's in numpy source code, so hopefully this works #pragma warning(disable:271) #endif #include <numpy/arrayobject.h> @@ -35,133 +37,56 @@ #include "formatinput.h" #include "benchmark.h" -//#define _PYAPI_VERBOSE_ -//#define _PYAPI_WARNINGS_ -#define _PYAPI_DEBUG_ - -#include <stdint.h> - -#include <stdio.h> -#include <string.h> - -#ifdef WIN32 - -#else +#ifndef WIN32 +// These libraries are used for PYTHON_MAP operations on Linux [to start new processes and wait on them] #include <sys/types.h> #include <sys/wait.h> #endif const char* pyapi_enableflag = "embedded_py"; +const char* verbose_enableflag = "enable_pyverbose"; +const char* warning_enableflag = "enable_pywarnings"; +const char* debug_enableflag = "enable_pydebug"; +#ifdef _PYAPI_TESTING_ const char* zerocopyinput_disableflag = "disable_pyzerocopyinput"; const char* zerocopyoutput_disableflag = "disable_pyzerocopyoutput"; -const char* verbose_enableflag = "enable_pyverbose"; -const char* debug_enableflag = "enable_pydebug"; const char* numpy_string_array_enableflag = "enable_numpystringarray"; const char* alwaysunicode_enableflag = "enable_alwaysunicode"; const char* bytearray_disableflag = "disable_bytearray"; const char* benchmark_output_flag = "pyapi_benchmark_output"; - - - +static bool option_zerocopyinput; +static bool option_zerocopyoutput; +static bool option_numpy_string_array; +static bool option_bytearray; +static bool option_alwaysunicode; +static char *benchmark_output; +#endif #ifdef _PYAPI_VERBOSE_ -#define VERBOSE_MESSAGE(...) { \ - if (shm_id > 0) printf("%d: ", shm_id); \ - printf(__VA_ARGS__); \ - fflush(stdout); \ -} -#else -#define VERBOSE_MESSAGE(...) ((void) 0) +static bool option_verbose; #endif - +#ifdef _PYAPI_DEBUG_ +static bool option_debug; +#endif #ifdef _PYAPI_WARNINGS_ -#define WARNING_MESSAGE(...) { \ - fprintf(stderr, __VA_ARGS__); \ - fflush(stdout); \ -} -#else -#define WARNING_MESSAGE(...) ((void) 0) +static bool option_warning; #endif - -#define GDK_Alloc(var, size) { \ - var = GDKzalloc(size); \ - if (var == NULL) { \ - msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL); \ - goto wrapup; \ - } \ -} - -#define GDK_Free(var) { \ - if (var != NULL) \ - GDKfree(var); \ -} - -const char * pyarg_tabwidth[] = {"TABWIDTH", "MULTIPROCESSING"}; - -struct _ParseArguments -{ - int tab_width; - bool multiprocessing; -}; -#define ParseArguments struct _ParseArguments - -struct _ReturnBatDescr -{ - int npy_type; //npy type - size_t element_size; //element size in bytes - size_t bat_count; //number of elements in bat - size_t bat_size; //bat size in bytes - size_t bat_start; //start position of bat - bool has_mask; //if the return value has a mask or not -}; -#define ReturnBatDescr struct _ReturnBatDescr - -struct _PyInput{ - void *dataptr; - BAT *bat; - int bat_type; - size_t count; - bool scalar; -}; -#define PyInput struct _PyInput - -struct _PyReturn{ - PyArrayObject *numpy_array; - PyArrayObject *numpy_mask; - void *array_data; - bool *mask_data; - size_t count; - size_t memory_size; - int result_type; - bool multidimensional; -}; -#define PyReturn struct _PyReturn - int PyAPIEnabled(void) { return (GDKgetenv_istrue(pyapi_enableflag) || GDKgetenv_isyes(pyapi_enableflag)); } static MT_Lock pyapiLock; -static MT_Lock pyapiSluice; static int pyapiInitialized = FALSE; - -#define bte_TO_PYSCALAR(mtpe, value) PyInt_FromLong((lng)value) -#define bit_TO_PYSCALAR(mtpe, value) PyInt_FromLong((lng)value) -#define sht_TO_PYSCALAR(mtpe, value) PyInt_FromLong((lng)value) -#define int_TO_PYSCALAR(mtpe, value) PyInt_FromLong((lng)value) -#define lng_TO_PYSCALAR(mtpe, value) PyLong_FromLong(value) -#define flt_TO_PYSCALAR(mtpe, value) PyFloat_FromDouble(value) -#define dbl_TO_PYSCALAR(mtpe, value) PyFloat_FromDouble(value) - -#define SCALAR_TO_PYSCALAR(mtpe, value) mtpe##_TO_PYSCALAR(mtpe, value) - - +#ifdef _PYAPI_TESTING_ +// This #define converts a BAT 'bat' of BAT type 'TYPE_mtpe' to a Numpy array of type 'nptpe' +// This only works with numeric types (bit, byte, int, long, float, double), strings are handled separately +// if _PYAPI_TESTING_ is enabled, and option_zerocopyinput is set to FALSE, the BAT is copied. Otherwise the internal BAT pointer is passed to the numpy array (zero copy) #define BAT_TO_NP(bat, mtpe, nptpe) \ if (!option_zerocopyinput) { \ vararray = PyArray_Zeros(1, (npy_intp[1]) {(t_end-t_start)}, PyArray_DescrFromType(nptpe), 0); \ - for(j = t_start; j < t_end; j++) { \ + for(j = t_start; j < t_end; j++) { \ PyArray_SETITEM((PyArrayObject*)vararray, PyArray_GETPTR1((PyArrayObject*)vararray, j - t_start), SCALAR_TO_PYSCALAR(mtpe, ((mtpe*) Tloc(bat, BUNfirst(bat)))[j])); \ } \ } else { \ @@ -169,8 +94,16 @@ static int pyapiInitialized = FALSE; nptpe, NULL, &((mtpe*) Tloc(bat, BUNfirst(bat)))[t_start], 0, \ NPY_ARRAY_CARRAY || !NPY_ARRAY_WRITEABLE, NULL); \ } +#else +#define BAT_TO_NP(bat, mtpe, nptpe) \ + vararray = PyArray_New(&PyArray_Type, 1, (npy_intp[1]) {(t_end-t_start)}, \ + nptpe, NULL, &((mtpe*) Tloc(bat, BUNfirst(bat)))[t_start], 0, \ + NPY_ARRAY_CARRAY || !NPY_ARRAY_WRITEABLE, NULL); +#endif -#define BAT_MMAP(bat, mtpe, batstore) { \ +// This #define creates a new BAT with the internal data and mask from a Numpy array, without copying the data +// 'bat' is a BAT* pointer, which will contain the new BAT. TYPE_'mtpe' is the BAT type, and 'batstore' is the heap storage type of the BAT (this should be STORE_CMEM or STORE_SHARED) +#define CREATE_BAT_ZEROCOPY(bat, mtpe, batstore) { \ bat = BATnew(TYPE_void, TYPE_##mtpe, 0, TRANSIENT); \ BATseqbase(bat, seqbase); bat->T->nil = 0; bat->T->nonil = 1; \ bat->tkey = 0; bat->tsorted = 0; bat->trevsorted = 0; \ @@ -204,9 +137,12 @@ static int pyapiInitialized = FALSE; bat->S->copiedtodisk = false; \ \ /*Take over the data from the numpy array*/ \ - if (ret->numpy_array != NULL) PyArray_CLEARFLAGS(ret->numpy_array, NPY_ARRAY_OWNDATA); \ + if (ret->numpy_array != NULL) PyArray_CLEARFLAGS((PyArrayObject*)ret->numpy_array, NPY_ARRAY_OWNDATA); \ } +// This #define converts a Numpy Array to a BAT by copying the internal data to the BAT. It assumes the BAT 'bat' is already created with the proper size. +// This should only be used with integer data that can be cast. It assumes the Numpy Array has an internal array of type 'mtpe_from', and the BAT has an internal array of type 'mtpe_to'. +// it then does the cast by simply doing BAT[i] = (mtpe_to) ((mtpe_from*)NUMPY_ARRAY[i]), which only works if both mtpe_to and mtpe_from are integers #define NP_COL_BAT_LOOP(bat, mtpe_to, mtpe_from) { \ if (mask == NULL) \ { \ @@ -231,6 +167,9 @@ static int pyapiInitialized = FALSE; } \ } } +// This #define converts a Numpy Array to a BAT by copying the internal data to the BAT. It converts the data from the Numpy Array to the BAT using a function +// This function has to have the prototype 'bool function(void *data, size_t memory_size, mtpe_to *resulting_value)', and either return False (if conversion fails) +// or write the value into the 'resulting_value' pointer. This is used convertring strings/unicodes/python objects to numeric values. #define NP_COL_BAT_LOOP_FUNC(bat, mtpe_to, func) { \ mtpe_to value; \ if (mask == NULL) \ @@ -267,6 +206,7 @@ static int pyapiInitialized = FALSE; } } +// This #define is for converting a numeric numpy array into a string BAT. 'conv' is a function that turns a numeric value of type 'mtpe' to a char* array. #define NP_COL_BAT_STR_LOOP(bat, mtpe, conv) \ if (mask == NULL) \ { \ @@ -293,6 +233,15 @@ static int pyapiInitialized = FALSE; } \ } +// This is here so we can remove the option_zerocopyoutput from the zero copy conditionals if testing is disabled +#ifdef _PYAPI_TESTING_ +#define ZEROCOPY_OUTPUT option_zerocopyoutput && +#else +#define ZEROCOPY_OUTPUT +#endif + +// This very big #define combines all the previous #defines for one big #define that is responsible for converting a Numpy array (described in the PyReturn object 'ret') +// to a BAT of type 'mtpe'. This should only be used for numeric BATs (but can be used for any Numpy Array). The resulting BAT will be stored in 'bat'. #define NP_CREATE_BAT(bat, mtpe) { \ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list