Changeset: be2c60d4445d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=be2c60d4445d
Modified Files:
        monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC
        monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err
        monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out
        monetdb5/extras/pyapi/pyapi.c
        monetdb5/extras/pyapi/type_conversion.c
        monetdb5/extras/pyapi/type_conversion.h
Branch: pyapi
Log Message:

Strings can now be stored in either a NPY_STRING (big char array) or NPY_OBJECT 
(pointers) array, depending on the numpy_string_array flag.


diffs (truncated from 745 to 300 lines):

diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC 
b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC
@@ -23,6 +23,18 @@ io.print(rhge, shge);
 (rhge:bat[:oid,:hge], shge:bat[:oid,:hge]) := 
pyapi.eval(nil:ptr,"return(numpy.array([[3200.3,12.7],[44.1,22.8]]))",bhge);
 io.print(rhge, shge);
 
+# convert hge to string
+rhge:bat[:oid,:str] := pyapi.eval(nil:ptr,"return(arg1)",bhge);
+io.print(rhge);
+
+# convert string to hge
+bstr:= bat.new(:oid,:str);
+bat.append(bstr,"412412":str);
+bat.append(bstr,"13231414":str);
+bat.append(bstr,"895233278923448975389573895731":str);
+rhge:bat[:oid,:hge] := pyapi.eval(nil:ptr,"return(arg1)", bstr);
+io.print(rhge);
+
 
 # return multidimensional huge
 (rhge:bat[:oid,:hge], shge:bat[:oid,:hge]) := 
pyapi.eval(nil:ptr,"return(numpy.ma.masked_array([arg1, arg1], 
[arg1.mask,arg1.mask]))",bhge);
@@ -54,3 +66,4 @@ io.print(rint);
 # convert huge to double
 rdbl:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return(arg1)",z);
 io.print(rdbl);
+
diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err 
b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err
--- a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err
+++ b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err
@@ -33,6 +33,7 @@ stderr of test 'pyapi_types_huge` in dir
 !WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are 
instead converted to python objects of type "long". This is likely very slow.
 !WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are 
instead converted to python objects of type "long". This is likely very slow.
 !WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are 
instead converted to python objects of type "long". This is likely very slow.
+!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are 
instead converted to python objects of type "long". This is likely very slow.
 
 # 00:29:47 >  
 # 00:29:47 >  "mclient" "-lmal" "-ftest" "-Eutf-8" "-i" "-e" 
"--host=/var/tmp/mtest-30800" "--port=32717"
diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out 
b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out
--- a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out
+++ b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out
@@ -60,6 +60,24 @@ Ready.
 #--------------------------#
 [ 0@0, 3200,   44      ]
 [ 1@0, 12,     22      ]
+#io.print(rhge);
+#--------------------------#
+# h    t  # name
+# void str  # type
+#--------------------------#
+[ 0@0, "18044433428933534654634643698858345"   ]
+[ 1@0, "895233278923448975389573895731"        ]
+[ 2@0, "558372892789247104910348981249"        ]
+[ 3@0, "-23"   ]
+[ 4@0, nil     ]
+#io.print(rhge);
+#--------------------------#
+# h    t  # name
+# void hge  # type
+#--------------------------#
+[ 0@0, 412412  ]
+[ 1@0, 13231414        ]
+[ 2@0, 895233278923448975389573895731  ]
 #io.print(rhge, shge);
 #--------------------------#
 # h    t       t  # name
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -119,7 +119,6 @@ int PyAPIEnabled(void) {
 
 char* FormatCode(char* code, char **args, size_t argcount, size_t tabwidth);
 
-// TODO: exclude pyapi from mergetable, too
 static MT_Lock pyapiLock;
 static MT_Lock pyapiSluice;
 static int pyapiInitialized = FALSE;
@@ -287,7 +286,7 @@ static int pyapiInitialized = FALSE;
         else                                                                   
                                                                                
\
         {                                                                      
                                                                                
\
             bat = BATnew(TYPE_void, TYPE_##mtpe, ret->count, TRANSIENT);       
                                                                                
\
-            BATseqbase(bat, seqbase); bat->T->nil = 0; bat->T->nonil = 1;      
                                                                                
      \
+            BATseqbase(bat, seqbase); bat->T->nil = 0; bat->T->nonil = 1;      
                                                                                
\
             bat->tkey = 0; bat->tsorted = 0; bat->trevsorted = 0;              
                                                                                
\
             switch(ret->result_type)                                           
                                                                                
\
             {                                                                  
                                                                                
\
@@ -308,6 +307,7 @@ static int pyapiInitialized = FALSE;
                 case NPY_LONGDOUBLE: NP_COL_BAT_LOOP(bat, mtpe, dbl); break;   
                                                                                
\
                 case NPY_STRING:     NP_COL_BAT_LOOP_FUNC(bat, mtpe, 
str_to_##mtpe); break;                                                          
          \
                 case NPY_UNICODE:    NP_COL_BAT_LOOP_FUNC(bat, mtpe, 
unicode_to_##mtpe); break;                                                      
          \
+                case NPY_OBJECT:     NP_COL_BAT_LOOP_FUNC(bat, mtpe, 
pyobject_to_##mtpe); break;                                                     
           \
                 default:                                                       
                                                                                
\
                     msg = createException(MAL, "pyapi.eval", "Unrecognized 
type. Could not convert to %s.\n", BatType_Format(TYPE_##mtpe));                
    \
                     goto wrapup;                                               
                                                                                
\
@@ -374,7 +374,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
     PyInput *pyinput_values = NULL;
     int seqbase = 0;
 
-    bool numpy_string_array = true;
+    bool numpy_string_array = false;
     bool option_verbose = GDKgetenv_isyes(verbose_enableflag) || 
GDKgetenv_istrue(verbose_enableflag);
     bool option_debug = GDKgetenv_isyes(debug_enableflag) || 
GDKgetenv_istrue(debug_enableflag);
     bool option_zerocopy = !(GDKgetenv_isyes(zerocopy_disableflag) || 
GDKgetenv_istrue(zerocopy_disableflag));
@@ -736,13 +736,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                     break;
 #ifdef HAVE_HGE
                 case TYPE_hge:
-                    {
-                        char hex[40];
-                        const hge *t = (const hge *) inp->dataptr;
-                        hge_to_string(hex, 40, *t);
-                        //then we create a PyLong from that string by parsing 
it
-                        vararray = PyLong_FromString(hex, NULL, 16);
-                    }
+                    vararray = PyLong_FromHge(*((hge *) inp->dataptr));
                     break;
 #endif
                 case TYPE_str:
@@ -911,45 +905,57 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                     }
                 }
                 else {
-                    // TODO: This
-                    // NPY_OBJECT array
-                    // vararray = PyArray_New(
-                    //     &PyArray_Type, 
-                    //     1, 
-                    //     (npy_intp[1]) {count},  
-                    //     NPY_OBJECT, 
-                    //     NULL, 
-                    //     NULL, 
-                    //     0, 
-                    //     0, 
-                    //     NULL);
-                    // j = 0;
-                    // BATloop(b, p, q)
-                    // {
-                    //     if (j >= t_start) {
-                    //         char *t = (char *) BUNtail(li, p);
-                    //         PyObject *obj;
-                    //         if (strcmp(t, str_nil) == 0) {
-                    //              //str_nil isn't a valid UTF-8 character 
(it's 0x80), so we can't decode it as UTF-8 (it will throw an error)
-                    //             obj = PyString_FromString("-");
-                    //         }
-                    //         else {
-                    //             //otherwise we can just decode the string 
as UTF-8
-                    //             obj = PyString_FromString(t);
-                    //         }
+                    bool ascii;
+                    li = bat_iterator(b);
+                    count = inp->count;
+                    //create a NPY_OBJECT array object
+                    vararray = PyArray_New(
+                        &PyArray_Type, 
+                        1, 
+                        (npy_intp[1]) {t_end - t_start},  
+                        NPY_OBJECT, 
+                        NULL, 
+                        NULL, 
+                        0,         
+                        0, 
+                        NULL);
+                    j = 0;
+                    BATloop(b, p, q)
+                    {
+                        if (j >= t_start) {
+                            char *t = (char *) BUNtail(li, p);
+                            PyObject *obj;
+                            utf8_strlen(t, &ascii);
+                            if (!ascii) {
+                                if (strcmp(t, str_nil) == 0) {
+                                     //str_nil isn't a valid UTF-8 character 
(it's 0x80), so we can't decode it as UTF-8 (it will throw an error)
+                                    obj = PyUnicode_FromString("-");
+                                }
+                                else {
+                                    //otherwise we can just decode the string 
as UTF-8
+                                    obj = PyUnicode_FromString(t);
+                                }
+                            } else {
+                                if (strcmp(t, str_nil) == 0) {
+                                     //str_nil isn't a valid UTF-8 character 
(it's 0x80), so we can't decode it as UTF-8 (it will throw an error)
+                                    obj = PyString_FromString("-");
+                                }
+                                else {
+                                    //otherwise we can just decode the string 
as UTF-8
+                                    obj = PyString_FromString(t);
+                                }
+                            }
 
-                    //         if (obj == NULL)
-                    //         {
-                    //             PyErr_Print();
-                    //             msg = createException(MAL, "pyapi.eval", 
"Failed to decode string as UTF-8.");
-                    //             goto wrapup;
-                    //         }
-                    //         PyArray_SETITEM((PyArrayObject*)vararray, 
PyArray_GETPTR1((PyArrayObject*)vararray, j), obj);
-                    //     }
-                    //     if (j == t_end) break;
-                    //     j++;
-                    // }
-                    // PyArray_INCREF((PyArrayObject*)vararray);
+                            if (obj == NULL)
+                            {
+                                msg = createException(MAL, "pyapi.eval", 
"Failed to create string.");
+                                goto wrapup;
+                            }
+                            PyArray_SETITEM((PyArrayObject*)vararray, 
PyArray_GETPTR1((PyArrayObject*)vararray, j), obj);
+                            }
+                        if (j == t_end) break;
+                        j++;
+                    }
                 }
                 break;
 #ifdef HAVE_HGE
@@ -966,24 +972,16 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                     NPY_OBJECT, 
                     NULL, 
                     NULL, 
-                    128,          //128 bits per value
-                    0, 
+                    0,
+                    0,
                     NULL);
 
                 j = 0;
                 fprintf(stderr, "!WARNING: Type \"hge\" (128 bit) is 
unsupported by Numpy. The numbers are instead converted to python objects of 
type \"long\". This is likely very slow.\n");
                 BATloop(b, p, q) {
-                    char hex[40];
                     PyObject *obj;
                     const hge *t = (const hge *) BUNtail(li, p);
-                    hge_to_string(hex, 40, *t);
-                    //then we create a PyLong from that string by parsing it
-                    obj = PyLong_FromString(hex, NULL, 16);
-                    if (obj == NULL) {
-                        PyErr_Print();
-                        msg = createException(MAL, "pyapi.eval", "Failed to 
convert huge array.");
-                        goto wrapup;
-                    }
+                    obj = PyLong_FromHge(*t);
                     PyArray_SETITEM((PyArrayObject*)vararray, 
PyArray_GETPTR1((PyArrayObject*)vararray, j), obj);
                     j++;
                 }
@@ -1171,37 +1169,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                     }
                 }
             }
-            if (!PyList_Check(pResult)) {
-                //check if the result is a multi-dimensional numpy array of 
type NPY_OBJECT
-                //if the result object is a multi-dimensional numpy array of 
type NPY_OBJECT, we convert it to NPY_STRING because we don't know how to 
handle NPY_OBJECT arrays otherwise (they could contain literally anything)
-                if (PyType_IsNumpyMaskedArray(pResult)) {
-                    PyObject *data, *mask;
-                    data = PyObject_GetAttrString(pResult, "data");  
-                    if (PyArray_NDIM((PyArrayObject*)data) != 1 && 
PyArray_DESCR((PyArrayObject*)data)->type_num == NPY_OBJECT) {
-                        //if it's a masked array we have to copy the mask 
along with converting the data to NPY_STRING 
-                        PyObject *mafunc, *maargs;
-                        PyObject *tp = PyArray_FromAny(pResult, 
PyArray_DescrFromType(NPY_STRING), 0, 0, NPY_ARRAY_CARRAY | 
NPY_ARRAY_FORCECAST, NULL);
-                        mask = PyObject_GetAttrString(pResult, "mask"); 
-
-                        mafunc = 
PyObject_GetAttrString(PyImport_Import(PyString_FromString("numpy.ma")), 
"masked_array");
-                        maargs = PyTuple_New(2);
-                        PyTuple_SetItem(maargs, 0, tp);
-                        PyTuple_SetItem(maargs, 1, mask);
-                        mask = PyObject_CallObject(mafunc, maargs);
-                        Py_DECREF(pResult);
-                        Py_DECREF(mafunc);
-                        pResult = mask;
-                    }  
-                }
-                else {
-                    if (PyArray_NDIM((PyArrayObject*)pResult) != 1 && 
PyArray_DESCR((PyArrayObject*)pResult)->type_num == NPY_OBJECT) {
-                        //if it's not a masked array we just convert the data 
to NPY_STRING
-                        PyObject *tp = PyArray_FromAny(pResult, 
PyArray_DescrFromType(NPY_STRING), 0, 0, NPY_ARRAY_CARRAY | 
NPY_ARRAY_FORCECAST, NULL);
-                        Py_DECREF(pResult);
-                        pResult = tp;
-                    }
-                }
-            }
             PyRun_SimpleString("del pyfun");
         }
         else {
@@ -1230,7 +1197,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
         int bat_type = ATOMstorage(getColumnType(getArgType(mb,pci,i)));
 
         ret->multidimensional = FALSE;
-
         // There are three possibilities (we have ensured this right after 
executing the Python call)
         // 1: The top level result object is a PyList or Numpy Array 
containing pci->retc Numpy Arrays
         // 2: The top level result object is a (pci->retc x N) dimensional 
Numpy Array [Multidimensional]
@@ -1258,7 +1224,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                 pColO = PyArray_GETITEM((PyArrayObject*)data, 
PyArray_GETPTR1((PyArrayObject*)data, i));
             }
         }
-
         // Now we have to do some preprocessing on the data
         if (ret->multidimensional) {
             // If it is a multidimensional Numpy array, we don't need to do 
any conversion, we can just do some pointers
@@ -1272,7 +1237,8 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
         else {
             // If it isn't we need to convert pColO to the expected Numpy 
Array type
             ret->numpy_array = NULL;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to