Changeset: aefe683f0b5e for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=aefe683f0b5e
Modified Files:
        monetdb5/extras/pyapi/Tests/pyapi02.malC
        monetdb5/extras/pyapi/pyapi.c
Branch: pyapi
Log Message:

Python API: mask array (working once, unclear why)


diffs (207 lines):

diff --git a/monetdb5/extras/pyapi/Tests/pyapi02.malC 
b/monetdb5/extras/pyapi/Tests/pyapi02.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi02.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi02.malC
@@ -8,16 +8,16 @@ bat.append(bbit,0:bit);
 bat.append(bbit,1:bit);
 bat.append(bbit,0:bit);
 bat.append(bbit,nil:bit);
-rbit:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bbit);
+rbit:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"print(arg1)\nreturn([numpy.add(arg1.filled(0),42)])",bbit);
 io.print(rbit);
 
 bbte:= bat.new(:oid,:bte);
 bat.append(bbte,42:bte);
 bat.append(bbte,84:bte);
-bat.append(bbte,254:bte);
+bat.append(bbte,111:bte);
 bat.append(bbte,0:bte);
 bat.append(bbte,nil:bte);
-rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bbte);
+rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([[42,43]])",bbte);
 io.print(rbte);
 
 bsht:= bat.new(:oid,:sht);
@@ -26,7 +26,7 @@ bat.append(bsht,82:sht);
 bat.append(bsht,0:sht);
 bat.append(bsht,3276:sht);
 bat.append(bsht,nil:sht);
-rsht:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bsht);
+rsht:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bsht);
 io.print(rsht);
 
 bint:= bat.new(:oid,:int);
@@ -35,7 +35,7 @@ bat.append(bint,846930886:int);
 bat.append(bint,1681692777:int);
 bat.append(bint,1714636915:int);
 bat.append(bint,nil:int);
-rint:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bint);
+rint:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bint);
 io.print(rint);
 
 bwrd:= bat.new(:oid,:wrd);
@@ -43,7 +43,7 @@ bat.append(bwrd,1804289383:wrd);
 bat.append(bwrd,846930886:wrd);
 bat.append(bwrd,1681692777:wrd);
 bat.append(bwrd,1714636915:wrd);
-rwrd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bwrd);
+rwrd:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bwrd);
 io.print(rwrd);
 
 blng:= bat.new(:oid,:lng);
@@ -51,7 +51,7 @@ bat.append(blng,1804289383L);
 bat.append(blng,846930886L);
 bat.append(blng,1681692777L);
 bat.append(blng,1714636915L);
-rlng:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",blng);
+rlng:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",blng);
 io.print(rlng);
 
 # not sure what to with hge, numpy only supports 128 bits when sizeof(long)=16
@@ -89,7 +89,7 @@ bat.append(bstr,"asdf":str);
 bat.append(bstr,"sd asd asd asd asd a":str);
 bat.append(bstr,"":str);
 bat.append(bstr,nil:str);
-rstr:bat[:oid,:int] := rapi.eval(nil:ptr,"unlist(lapply(arg1,nchar))",bstr);
+rstr:bat[:oid,:int] := pyapi.eval(nil:ptr,"unlist(lapply(arg1,nchar))",bstr);
 io.print(rstr);
 
 
@@ -103,26 +103,26 @@ bat.append(binto,1681692777:int);
 bat.append(binto,1714636915:int);
 bat.append(binto,nil:int);
 
-rintbi:bat[:oid,:int] := rapi.eval(nil:ptr,"arg1",binto);
+rintbi:bat[:oid,:int] := pyapi.eval(nil:ptr,"arg1",binto);
 io.print(rintbi);
 
-rintbi2:bat[:oid,:int] := rapi.eval(nil:ptr,"as.integer(arg1)",binto);
+rintbi2:bat[:oid,:int] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
 io.print(rintbi2);
 
-rintbl:bat[:oid,:lng] := rapi.eval(nil:ptr,"as.integer(arg1)",binto);
+rintbl:bat[:oid,:lng] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
 io.print(rintbl);
 
-rintbh:bat[:oid,:hge] := rapi.eval(nil:ptr,"as.integer(arg1)",binto);
+rintbh:bat[:oid,:hge] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
 io.print(rintbh);
 
-rintbd:bat[:oid,:dbl] := rapi.eval(nil:ptr,"as.numeric(arg1)",binto);
+rintbd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"as.numeric(arg1)",binto);
 io.print(rintbd);
 
-rintbs:bat[:oid,:str] := rapi.eval(nil:ptr,"as.character(arg1)",binto);
+rintbs:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.character(arg1)",binto);
 io.print(rintbs);
 
 # factors should be strings
-rintbf:bat[:oid,:str] := rapi.eval(nil:ptr,"as.factor(arg1)",binto);
+rintbf:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.factor(arg1)",binto);
 io.print(rintbf);
 
 
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -58,6 +58,7 @@ static int pyapiInitialized = FALSE;
                                        *(mtpe*) PyArray_GETPTR1(pCol, j); }    
          \
                BATsetcount(bat, cnt); }
 
+// TODO: also handle the case if someone returns a masked array
 
 #define _PYAPI_DEBUG_
 
@@ -152,6 +153,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
        // for each input column (BAT):
        for (i = pci->retc + 2; i < pci->argc; i++) {
                PyObject *vararray = NULL;
+               // null mask for masked array
 
                // turn scalars into one-valued BATs
                // TODO: also do this for Python? Or should scalar values be 
'simple' variables?
@@ -176,6 +178,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                        }
                }
 
+
                switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) {
                case TYPE_bte:
                        vararray = BAT_TO_NP(b, bte, NPY_INT8);
@@ -195,16 +198,50 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                case TYPE_dbl:
                        vararray = BAT_TO_NP(b, dbl, NPY_FLOAT64);
                        break;
-                       // TODO: handle NULLs!
 
                // TODO: implement other types (strings, boolean)
                default:
                        msg = createException(MAL, "pyapi.eval", "unknown 
argument type ");
                        goto wrapup;
                }
+
+               // we use numpy.ma to deal with possible NULL values in the data
+               // once numpy comes with proper NA support, this will change
+               {
+                       PyObject *mafunc = 
PyObject_GetAttrString(PyImport_Import(
+                                       PyString_FromString("numpy.ma")), 
"masked_array");
+                       PyObject *maargs = PyTuple_New(2);
+                       PyArrayObject* nullmask = (PyArrayObject*) 
PyArray_ZEROS(1,
+                                                       (npy_intp[1]) 
{BATcount(b)}, NPY_BOOL, 0);
+
+                       const void *nil = ATOMnilptr(b->ttype);
+                       int (*atomcmp)(const void *, const void *) = 
ATOMcompare(b->ttype);
+                       BATiter bi = bat_iterator(b);
+
+                       if (b->T->nil) {
+                               size_t j;
+                               for (j = 0; j < BATcount(b); j++) {
+                                       if ((*atomcmp)(BUNtail(bi, BUNfirst(b) 
+ j), nil) == 0) {
+                                               // Houston we have a NULL
+                                               PyArray_SETITEM(nullmask, 
PyArray_GETPTR1(nullmask, j), Py_True);
+                                       }
+                               }
+                       }
+                       PyTuple_SetItem(maargs, 0, vararray);
+                       PyTuple_SetItem(maargs, 1, (PyObject*) nullmask);
+                       vararray = PyObject_CallObject(mafunc, maargs);
+                       if (!vararray) {
+                               msg = createException(MAL, "pyapi.eval", 
"UUUH");
+                                               goto wrapup;
+                       }
+               }
+               PyTuple_SetItem(pArgs, ai++, vararray);
+
+               // TODO: we cannot clean this up just yet, there may be a 
shallow copy referenced in python.
+               // TODO: do this later
+
                BBPunfix(b->batCacheid);
 
-               PyTuple_SetItem(pArgs, ai++, vararray);
        }
 
        // create argument list
@@ -284,11 +321,13 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                msg = createException(MAL, "pyapi.eval", "Command too large");
                goto wrapup;
        }
-
        {
                int pyret;
                PyObject *pFunc, *pModule;
 
+               // TODO: does this create overhead?, see if we can share the 
import
+               PyRun_SimpleString("import numpy");
+
                pModule = PyImport_Import(PyString_FromString("__main__"));
                pyret = PyRun_SimpleString(pycall);
                pFunc = PyObject_GetAttrString(pModule, "pyfun");
@@ -298,9 +337,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                        goto wrapup;
                }
 
-               // TODO: does this create overhead?, see if we can share the 
import
-               PyRun_SimpleString("import numpy");
-
                pResult = PyObject_CallObject(pFunc, pArgs);
                if (PyErr_Occurred()) {
                        PyObject *pErrType, *pErrVal, *pErrTb;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to