Changeset: f197b705aae5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f197b705aae5
Modified Files:
        monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
        monetdb5/extras/pyapi/pyapi.c
        monetdb5/extras/pyapi/pytypes.h
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql.h
Branch: pyapi
Log Message:

Move code for Python Object -> BAT conversion into separate functions.


diffs (truncated from 903 to 300 lines):

diff --git a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh 
b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
--- a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
+++ b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
@@ -11,7 +11,7 @@ export MSERVERTEST='netstat -ant | grep 
 # Testing parameters
 # Input test (zero copy vs copy)
 # The input sizes to test (in MB)
-export INPUT_TESTING_SIZES="0.1 1 10 100 1000 10000"
+export INPUT_TESTING_SIZES="0.1 1 10 100 1000"
 # Amount of tests to run for each size
 export INPUT_TESTING_NTESTS=10
 
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -749,7 +749,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                 t_start = ceil((count * chunk) / totalchunks);
                 t_end = floor((count * (chunk + 1)) / totalchunks);
                 if (((int)count) / 2 * 2 == (int)count) t_end--;
-                VERBOSE_MESSAGE("---Start: %d, End: %d, Count: %d\n", t_start, 
t_end, t_end - t_start);
+                VERBOSE_MESSAGE("---Start: %zu, End: %zu, Count: %zu\n", 
t_start, t_end, t_end - t_start);
             }
         }
 #endif
@@ -830,111 +830,20 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
             goto wrapup;
         }
 
+        if (code_object == NULL) { PyRun_SimpleString("del pyfun"); }
+
         // Now we need to do some error checking on the result object, because 
the result object has to have the correct type/size
         // We will also do some converting of result objects to a common type 
(such as scalar -> [[scalar]])
-        if (pResult) {
-            PyObject * pColO = NULL;
-
-            if (PyType_IsPandasDataFrame(pResult)) {
-                //the result object is a Pandas data frame
-                //we can convert the pandas data frame to a numpy array by 
simply accessing the "values" field (as pandas dataframes are numpy arrays 
internally)
-                pResult = PyObject_GetAttrString(pResult, "values"); 
-                if (pResult == NULL) {
-                    msg = createException(MAL, "pyapi.eval", "Invalid Pandas 
data frame.");
-                    goto wrapup; 
-                }
-                //we transpose the values field so it's aligned correctly for 
our purposes
-                pResult = PyObject_GetAttrString(pResult, "T");
-                if (pResult == NULL) {
-                    msg = createException(MAL, "pyapi.eval", "Invalid Pandas 
data frame.");
-                    goto wrapup; 
-                }
-            }
-
-            if (PyType_IsPyScalar(pResult)) { //check if the return object is 
a scalar 
-                if (pci->retc == 1)  {
-                    //if we only expect a single return value, we can accept 
scalars by converting it into an array holding an array holding the element 
(i.e. [[pResult]])
-                    PyObject *list = PyList_New(1);
-                    PyList_SetItem(list, 0, pResult);
-                    pResult = list;
-
-                    list = PyList_New(1);
-                    PyList_SetItem(list, 0, pResult);
-                    pResult = list;
-                }
-                else {
-                    //the result object is a scalar, yet we expect more than 
one return value. We can only convert the result into a list with a single 
element, so the output is necessarily wrong.
-                    msg = createException(MAL, "pyapi.eval", "A single scalar 
was returned, yet we expect a list of %d columns. We can only convert a single 
scalar into a single column, thus the result is invalid.", pci->retc);
-                    goto wrapup;
-                }
-            }
-            else {
-                //if it is not a scalar, we check if it is a single array
-                bool IsSingleArray = TRUE;
-                PyObject *data = pResult;
-                if (PyType_IsNumpyMaskedArray(data)) {
-                    data = PyObject_GetAttrString(pResult, "data");   
-                    if (data == NULL) {
-                        msg = createException(MAL, "pyapi.eval", "Invalid 
masked array.");
-                        goto wrapup;
-                    }           
-                }
-                if (PyType_IsNumpyArray(data)) {
-                    if (PyArray_NDIM((PyArrayObject*)data) != 1) {
-                        IsSingleArray = FALSE;
-                    }
-                    else {
-                        pColO = PyArray_GETITEM((PyArrayObject*)data, 
PyArray_GETPTR1((PyArrayObject*)data, 0));
-                        IsSingleArray = PyType_IsPyScalar(pColO);
-                    }
-                }
-                else if (PyList_Check(data)) {
-                    pColO = PyList_GetItem(data, 0);
-                    IsSingleArray = PyType_IsPyScalar(pColO);
-                }
-                else if (PyLazyArray_CheckExact(data)) {
-                    pColO = data;
-                    IsSingleArray = TRUE;
-                } else if (!PyType_IsNumpyMaskedArray(data)) {
-                    //it is neither a python array, numpy array or numpy 
masked array, thus the result is unsupported! Throw an exception!
-                    msg = createException(MAL, "pyapi.eval", "Unsupported 
result object. Expected either an array, a numpy array, a numpy masked array or 
a pandas data frame, but received an object of type \"%s\"", 
PyString_AsString(PyObject_Str(PyObject_Type(data))));
-                    goto wrapup;
-                }
-
-                if (IsSingleArray) {
-                    if (pci->retc == 1) {
-                        //if we only expect a single return value, we can 
accept a single array by converting it into an array holding an array holding 
the element (i.e. [pResult])
-                        PyObject *list = PyList_New(1);
-                        PyList_SetItem(list, 0, pResult);
-                        pResult = list;
-                    }
-                    else {
-                        //the result object is a single array, yet we expect 
more than one return value. We can only convert the result into a list with a 
single array, so the output is necessarily wrong.
-                        msg = createException(MAL, "pyapi.eval", "A single 
array was returned, yet we expect a list of %d columns. The result is 
invalid.", pci->retc);
-                        goto wrapup;
-                    }
-                }
-                else {
-                    //the return value is an array of arrays, all we need to 
do is check if it is the correct size
-                    int results = 0;
-                    if (PyList_Check(data)) results = PyList_Size(data);
-                    else results = PyArray_DIMS((PyArrayObject*)data)[0];
-                    if (results != pci->retc) {
-                        //wrong return size, we expect pci->retc arrays
-                        msg = createException(MAL, "pyapi.eval", "An array of 
size %d was returned, yet we expect a list of %d columns. The result is 
invalid.", results, pci->retc);
-                        goto wrapup;
-                    }
-                }
-            }
-            if (code_object == NULL) { PyRun_SimpleString("del pyfun"); }
-        } else {
-            msg = createException(MAL, "pyapi.eval", "Invalid result object. 
No result object could be generated.");
+        pResult = PyObject_CheckForConversion(pResult, pci->retc, NULL, &msg);
+        if (pResult == NULL) {
             goto wrapup;
         }
     }
 
     VERBOSE_MESSAGE("Collecting return values.\n");
 
+
+
     // Now we have executed the Python function, we have to collect the return 
values and convert them to BATs
     // We will first collect header information about the Python return 
objects and extract the underlying C arrays
     // We will store this header information in a PyReturn object
@@ -942,89 +851,10 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
     // The reason we are doing this as a separate step is because this 
preprocessing requires us to call the Python API
     // Whereas the actual returning does not require us to call the Python API
     // This means we can do the actual returning without holding the GIL
-    for (i = 0; i < pci->retc; i++) {
-        // Refers to the current Numpy mask (if it exists)
-        PyObject *pMask = NULL;
-        // Refers to the current Numpy array
-        PyObject * pColO = NULL;
-        // This is the PyReturn header information for the current return 
value, we will fill this now
-        PyReturn *ret = &pyreturn_values[i];
-
-        ret->multidimensional = FALSE;
-        // There are three possibilities (we have ensured this right after 
executing the Python call)
-        // 1: The top level result object is a PyList or Numpy Array 
containing pci->retc Numpy Arrays
-        // 2: The top level result object is a (pci->retc x N) dimensional 
Numpy Array [Multidimensional]
-        // 3: The top level result object is a (pci->retc x N) dimensional 
Numpy Masked Array [Multidimensional]
-        if (PyList_Check(pResult)) {
-            // If it is a PyList, we simply get the i'th Numpy array from the 
PyList
-            pColO = PyList_GetItem(pResult, i);
-        }
-        else {
-            // If it isn't, the result object is either a Nump Masked Array or 
a Numpy Array
-            PyObject *data = pResult;
-            if (PyType_IsNumpyMaskedArray(data)) {
-                data = PyObject_GetAttrString(pResult, "data"); // If it is a 
Masked array, the data is stored in the masked_array.data attribute
-                pMask = PyObject_GetAttrString(pResult, "mask");    
-            }
-
-            // We can either have a multidimensional numpy array, or a single 
dimensional numpy array 
-            if (PyArray_NDIM((PyArrayObject*)data) != 1) {
-                // If it is a multidimensional numpy array, we have to convert 
the i'th dimension to a NUMPY array object
-                ret->multidimensional = TRUE;
-                ret->result_type = 
PyArray_DESCR((PyArrayObject*)data)->type_num;
-            }
-            else {
-                // If it is a single dimensional Numpy array, we get the i'th 
Numpy array from the Numpy Array
-                pColO = PyArray_GETITEM((PyArrayObject*)data, 
PyArray_GETPTR1((PyArrayObject*)data, i));
-            }
-        }
-
-        // Now we have to do some preprocessing on the data
-        if (ret->multidimensional) {
-            // If it is a multidimensional Numpy array, we don't need to do 
any conversion, we can just do some pointers
-            ret->count = PyArray_DIMS((PyArrayObject*)pResult)[1];        
-            ret->numpy_array = pResult;                   
-            ret->numpy_mask = pMask;   
-            ret->array_data = PyArray_DATA((PyArrayObject*)ret->numpy_array);
-            if (ret->numpy_mask != NULL) ret->mask_data = 
PyArray_DATA((PyArrayObject*)ret->numpy_mask);                 
-            ret->memory_size = 
PyArray_DESCR((PyArrayObject*)ret->numpy_array)->elsize;   
-        }
-        else {
-            if (PyLazyArray_CheckExact(pColO)) {
-                // To handle returning of lazy arrays, we just convert them to 
a Numpy array. This is slow and could be done much faster, but since this can 
only happen if we directly return one of the input arguments this should be a 
rare situation anyway.
-                pColO = PyLazyArray_AsNumpyArray(pColO);
-                if (pColO == NULL) {
-                    msg = PyError_CreateException("Failed to convert lazy 
array to numpy array.\n", NULL);
-                    goto wrapup;
-                }
-            }
-            // If it isn't we need to convert pColO to the expected Numpy 
Array type
-            ret->numpy_array = PyArray_FromAny(pColO, NULL, 1, 1, 
NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, NULL);
-            if (ret->numpy_array == NULL) {
-                msg = createException(MAL, "pyapi.eval", "Could not create a 
Numpy array from the return type.\n");
-                goto wrapup;
-            }
-            
-            ret->result_type = 
PyArray_DESCR((PyArrayObject*)ret->numpy_array)->type_num; // We read the 
result type from the resulting array
-            ret->memory_size = 
PyArray_DESCR((PyArrayObject*)ret->numpy_array)->elsize;
-            ret->count = PyArray_DIMS((PyArrayObject*)ret->numpy_array)[0];
-            ret->array_data = PyArray_DATA((PyArrayObject*)ret->numpy_array);
-            // If pColO is a Masked array, we convert the mask to a NPY_BOOL 
numpy array     
-            if (PyObject_HasAttrString(pColO, "mask")) {
-                pMask = PyObject_GetAttrString(pColO, "mask");
-                if (pMask != NULL) {
-                    ret->numpy_mask = PyArray_FromAny(pMask, 
PyArray_DescrFromType(NPY_BOOL), 1, 1,  NPY_ARRAY_CARRAY, NULL);
-                    if (ret->numpy_mask == NULL || 
PyArray_DIMS((PyArrayObject*)ret->numpy_mask)[0] != (int)ret->count)
-                    {
-                        PyErr_Clear();
-                        pMask = NULL;
-                        ret->numpy_mask = NULL;                            
-                    }
-                }
-            }
-            if (ret->numpy_mask != NULL) ret->mask_data = 
PyArray_DATA((PyArrayObject*)ret->numpy_mask); 
-        }
+    if (!PyObject_PreprocessObject(pResult, pyreturn_values, pci->retc, &msg)) 
{
+        goto wrapup;
     }
+    
 
 #ifndef WIN32
     /*[SHARED_MEMORY]*/
@@ -1239,184 +1069,18 @@ returnvalues:
     {
         PyReturn *ret = &pyreturn_values[i];
         int bat_type = ATOMstorage(getColumnType(getArgType(mb,pci,i)));
-        size_t index_offset = 0;
 
         if (bat_type == TYPE_any || bat_type == TYPE_void) {
             getArgType(mb,pci,i) = bat_type;
             msg = createException(MAL, "pyapi.eval", "Unknown return value, 
possibly projecting with no parameters.");
             goto wrapup;
-       }
-
-        if (ret->multidimensional) index_offset = i;
-        VERBOSE_MESSAGE("- Returning a Numpy Array of type %s of size %zu and 
storing it in a BAT of type %s\n", PyType_Format(ret->result_type), ret->count, 
 BatType_Format(bat_type));
-        switch (bat_type) 
-        {
-        case TYPE_bte:
-            NP_CREATE_BAT(b, bit);
-            break;
-        case TYPE_sht:
-            NP_CREATE_BAT(b, sht);
-            break;
-        case TYPE_int:
-            NP_CREATE_BAT(b, int);
-            break;
-        case TYPE_lng:
-            NP_CREATE_BAT(b, lng);
-            break;
-        case TYPE_flt:
-            NP_CREATE_BAT(b, flt);
-            break;
-        case TYPE_dbl:
-            NP_CREATE_BAT(b, dbl);
-            break;
-#ifdef HAVE_HGE
-        case TYPE_hge:
-            NP_CREATE_BAT(b, hge);
-            break;
-#endif
-        case TYPE_str:
-            {
-                bool *mask = NULL;   
-                char *data = NULL;  
-                char *utf8_string = NULL;
-                if (ret->mask_data != NULL)   
-                {   
-                    mask = (bool*)ret->mask_data;   
-                }   
-                if (ret->array_data == NULL)   
-                {   
-                    msg = createException(MAL, "pyapi.eval", "No return value 
stored in the structure.  n");         
-                    goto wrapup;      
-                }          
-                data = (char*) ret->array_data;   
-
-                if (ret->result_type != NPY_OBJECT) {
-                    utf8_string = GDKzalloc(256 + ret->memory_size + 1); 
-                    utf8_string[256 + ret->memory_size] = '\0';       
-                }
-
-                b = BATnew(TYPE_void, TYPE_str, ret->count, TRANSIENT);    
-                BATseqbase(b, seqbase); b->T->nil = 0; b->T->nonil = 1;        
 
-                b->tkey = 0; b->tsorted = 0; b->trevsorted = 0;
-                VERBOSE_MESSAGE("- Collecting return values of type %s.\n", 
PyType_Format(ret->result_type));
-                switch(ret->result_type)                                       
                   
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to