Changeset: 663b509eff6c for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=663b509eff6c Modified Files: monetdb5/extras/pyapi/Makefile.ag monetdb5/extras/pyapi/Tests/pyapi00.malC monetdb5/extras/pyapi/pyapi.c monetdb5/extras/pyapi/pyapi.mal Branch: pyapi Log Message:
Python API: diffs (truncated from 508 to 300 lines): diff --git a/monetdb5/extras/pyapi/Makefile.ag b/monetdb5/extras/pyapi/Makefile.ag --- a/monetdb5/extras/pyapi/Makefile.ag +++ b/monetdb5/extras/pyapi/Makefile.ag @@ -18,19 +18,18 @@ lib__pyapi = { MODULE DIR = libdir/monetdb5 SOURCES = pyapi.c pyapi.h + XDEPS = $(libpy_LIBDEP) LIBS = ../../tools/libmonetdb5 \ ../../../gdk/libbat \ $(MALLOC_LIBS) $(libpy_LIBS) } - headers_pyapi_mal = { HEADERS = mal DIR = libdir/monetdb5 SOURCES = pyapi.mal } - headers_autoload = { HEADERS = mal DIR = libdir/monetdb5/autoload diff --git a/monetdb5/extras/pyapi/Tests/pyapi00.malC b/monetdb5/extras/pyapi/Tests/pyapi00.malC --- a/monetdb5/extras/pyapi/Tests/pyapi00.malC +++ b/monetdb5/extras/pyapi/Tests/pyapi00.malC @@ -12,7 +12,7 @@ bat.append(b,1189641421); io.print(b); -r:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"TODO",b); +r:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"print(arg1);someval <- Re(fft(arg1)); print(someval); return(someval);",b); io.print(r); diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -28,16 +28,14 @@ int PyAPIEnabled(void) { || GDKgetenv_isyes(pyapi_enableflag)); } +// TODO: exclude pyapi from mergetable, too // TODO: can we call the Python interpreter in a multi-thread environment? static MT_Lock pyapiLock; static int pyapiInitialized = FALSE; -static int PyAPIinitialize(void) { - Py_Initialize(); - pyapiInitialized++; - return 0; -} +#define _PYAPI_DEBUG_ + pyapi_export str PyAPIevalStd(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { @@ -48,212 +46,214 @@ pyapi_export str PyAPIevalAggr(Client cn return PyAPIeval(cntxt, mb, stk, pci, 1); } + str PyAPIeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, bit grouped) { + sql_func * sqlfun = *(sql_func**) getArgReference(stk, pci, pci->retc); + str exprStr = *getArgReference_str(stk, pci, pci->retc + 1); - sql_func * sqlfun = *(sql_func**) getArgReference(stk, pci, pci->retc); - str exprStr = *getArgReference_str(stk, pci, pci->retc + 1); + int i = 1, ai = 0; + char argbuf[64]; + char argnames[1000] = ""; + size_t pos; + char* rcall = NULL; + size_t rcalllen; + size_t ret_rows = 0; + //int ret_cols = 0; /* int because pci->retc is int, too*/ + str *args; + //int evalErr; + char *msg = MAL_SUCCEED; + BAT *b; + BUN cnt; + node * argnode; + int seengrp = FALSE; + PyObject *pArgs; // this is going to be the parameter tuple - int i = 1, ai = 0; - char argbuf[64]; - char argnames[1000] = ""; - size_t pos; - char* rcall = NULL; - size_t rcalllen; - size_t ret_rows = 0; - //int ret_cols = 0; /* int because pci->retc is int, too*/ - str *args; - //int evalErr; - char *msg = MAL_SUCCEED; - BAT *b; - BUN cnt; - node * argnode; - int seengrp = FALSE; - PyObject *pArgs; // this is going to be the parameter tuple! + // we don't need no context, but the compiler needs us to touch it (...) + (void) cntxt; - // we don't need no context, but the compiler needs us to touch it (...) - (void) cntxt; + if (!PyAPIEnabled()) { + throw(MAL, "pyapi.eval", + "Embedded Python has not been enabled. Start server with --set %s=true", + pyapi_enableflag); + } - if (!PyAPIEnabled()) { - throw(MAL, "pyapi.eval", - "Embedded Python has not been enabled. Start server with --set %s=true", - pyapi_enableflag); + rcalllen = strlen(exprStr) + sizeof(argnames) + 100; + rcall = malloc(rcalllen); + args = (str*) GDKzalloc(sizeof(str) * pci->argc); + + if (args == NULL || rcall == NULL) { + throw(MAL, "pyapi.eval", MAL_MALLOC_FAIL); + // TODO: free args and rcall + } + + // TODO: do we need this lock for Python as well? + MT_lock_set(&pyapiLock, "pyapi.evaluate"); + + + + // first argument after the return contains the pointer to the sql_func structure + if (sqlfun != NULL && sqlfun->ops->cnt > 0) { + int carg = pci->retc + 2; + argnode = sqlfun->ops->h; + while (argnode) { + char* argname = ((sql_arg*) argnode->data)->name; + args[carg] = GDKstrdup(argname); + carg++; + argnode = argnode->next; } + } + pArgs = PyTuple_New(pci->argc - pci->retc + 2); - rcalllen = strlen(exprStr) + sizeof(argnames) + 100; - rcall = malloc(rcalllen); - args = (str*) GDKzalloc(sizeof(str) * pci->argc); - - if (args == NULL || rcall == NULL) { - throw(MAL, "pyapi.eval", MAL_MALLOC_FAIL); - // TODO: free args and rcall - } - - // TODO: do we need this lock for Python as well? - MT_lock_set(&pyapiLock, "pyapi.evaluate"); - - // first argument after the return contains the pointer to the sql_func structure - if (sqlfun != NULL && sqlfun->ops->cnt > 0) { - int carg = pci->retc + 2; - argnode = sqlfun->ops->h; - while (argnode) { - char* argname = ((sql_arg*) argnode->data)->name; - args[carg] = GDKstrdup(argname); - carg++; - argnode = argnode->next; + // the first unknown argument is the group, we don't really care for the rest. + for (i = pci->retc + 2; i < pci->argc; i++) { + if (args[i] == NULL) { + if (!seengrp && grouped) { + args[i] = GDKstrdup("aggr_group"); + seengrp = TRUE; + } else { + snprintf(argbuf, sizeof(argbuf), "arg%i", i - pci->retc - 1); + args[i] = GDKstrdup(argbuf); } } - pArgs = PyTuple_New(pci->argc - pci->retc + 2); + } - // the first unknown argument is the group, we don't really care for the rest. - for (i = pci->retc + 2; i < pci->argc; i++) { - if (args[i] == NULL) { - if (!seengrp && grouped) { - args[i] = GDKstrdup("aggr_group"); - seengrp = TRUE; - } else { - snprintf(argbuf, sizeof(argbuf), "arg%i", i - pci->retc - 1); - args[i] = GDKstrdup(argbuf); - } + // for each input column (BAT): + for (i = pci->retc + 2; i < pci->argc; i++) { + // turn scalars into one-valued BATs + // TODO: also do this for Python? Or should scalar values be 'simple' variables? + if (!isaBatType(getArgType(mb,pci,i))) { + b = BATnew(TYPE_void, getArgType(mb, pci, i), 0, TRANSIENT); + if (b == NULL) { + msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL); + goto wrapup; + } + if ( getArgType(mb,pci,i) == TYPE_str) + BUNappend(b, *getArgReference_str(stk, pci, i), FALSE); + else + BUNappend(b, getArgReference(stk, pci, i), FALSE); + BATsetcount(b, 1); + BATseqbase(b, 0); + BATsettrivprop(b); + } else { + b = BATdescriptor(*getArgReference_bat(stk, pci, i)); + if (b == NULL) { + msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL); + goto wrapup; } } - // for each input column (BAT): - for (i = pci->retc + 2; i < pci->argc; i++) { - // turn scalars into one-valued BATs - // TODO: also do this for Python? Or should scalar values be 'simple' variables? - if (!isaBatType(getArgType(mb,pci,i))) { - b = BATnew(TYPE_void, getArgType(mb, pci, i), 0, TRANSIENT); - if (b == NULL) { - msg = createException(MAL, "rapi.eval", MAL_MALLOC_FAIL); - goto wrapup; - } - if ( getArgType(mb,pci,i) == TYPE_str) - BUNappend(b, *getArgReference_str(stk, pci, i), FALSE); - else - BUNappend(b, getArgReference(stk, pci, i), FALSE); - BATsetcount(b, 1); - BATseqbase(b, 0); - BATsettrivprop(b); - } else { - b = BATdescriptor(*getArgReference_bat(stk, pci, i)); - if (b == NULL) { - msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL); - goto wrapup; - } - } + PyObject *varlist = PyList_New(BATcount(b)); + size_t j; - PyObject *varlist = PyList_New(BATcount(b)); - size_t j; + switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) { + case TYPE_int: + // BAT_TO_INTSXP(b, int, varvalue); + for (j = 0; j < BATcount(b); j++) { + int v = ((int*) Tloc(b, BUNfirst(b)))[j]; + //if ( v == int_nil) + // PyList_SET_ITEM(varlist, j, ); + //else + PyList_SET_ITEM(varlist, j, PyInt_FromLong(v)); + } + break; + // TODO: implement other types + default: + msg = createException(MAL, "pyapi.eval", "unknown argument type "); + goto wrapup; + } + BBPunfix(b->batCacheid); - switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) { - case TYPE_int: - // BAT_TO_INTSXP(b, int, varvalue); - for (j = 0; j < BATcount(b); j++) { - int v = ((int*) Tloc(b, BUNfirst(b)))[j]; - //if ( v == int_nil) - // PyList_SET_ITEM(varlist, j, ); - //else - PyList_SET_ITEM(varlist, j, PyInt_FromLong(v)); - } - break; - // TODO: implement other types - default: - msg = createException(MAL, "pyapi.eval", "unknown argument type "); - goto wrapup; - } - BBPunfix(b->batCacheid); + PyTuple_SetItem(pArgs, ai++, varlist); + } - PyTuple_SetItem(pArgs, ai++, varlist); + pos = 0; + for (i = pci->retc + 2; i < pci->argc && pos < sizeof(argnames); i++) { + pos += snprintf(argnames + pos, sizeof(argnames) - pos, "%s%s", + args[i], i < pci->argc - 1 ? ", " : ""); + } + if (pos >= sizeof(argnames)) { + msg = createException(MAL, "pyapi.eval", "Command too large"); + goto wrapup; + } + if (snprintf(rcall, rcalllen, + "ret <- as.data.frame((function(%s){%s})(%s), nm=NA, stringsAsFactors=F)\n", + argnames, exprStr, argnames) >= (int) rcalllen) { + msg = createException(MAL, "pyapi.eval", "Command too large"); + goto wrapup; + } +#ifdef _PYAPI_DEBUG_ + printf("# Python call %s\n",rcall); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list