Repository: arrow Updated Branches: refs/heads/master 909f826b5 -> 81be9c667
ARROW-866: [Python] Be robust to PyErr_Fetch returning a null exc value cc @BryanCutler. This was a tricky one. I am not sure how to reproduce with our current code -- I reverted the patch from ARROW-822 to get a reproduction so I could fix this. Now, the error raised is: ``` /home/wesm/code/arrow/python/pyarrow/_error.pyx in pyarrow._error.check_status (/home/wesm/code/arrow/python/build/temp.linux-x86_64-2.7/_error.cxx:1324)() 58 raise ArrowInvalid(message) 59 elif status.IsIOError(): ---> 60 raise ArrowIOError(message) 61 elif status.IsOutOfMemory(): 62 raise ArrowMemoryError(message) ArrowIOError: IOError: Error message was null ``` I'm not sure why calling `tell` on the socket object results in a bad exception state, but in any case it seems that the result of `PyErr_Fetch` cannot be relied upon to be non-null even when `PyErr_Occurred()` returns non-null Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #606 from wesm/ARROW-866 and squashes the following commits: fa395cd [Wes McKinney] Enable other kinds of Status errors to be returned 0bd11c2 [Wes McKinney] Consolidate error handling code a bit 9d59dd2 [Wes McKinney] Be robust to PyErr_Fetch returning a null exc value Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/81be9c66 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/81be9c66 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/81be9c66 Branch: refs/heads/master Commit: 81be9c6679466177d4b8e5dbca55f81185bb3ec6 Parents: 909f826 Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Thu Apr 27 18:10:24 2017 +0200 Committer: Uwe L. Korn <uw...@xhochy.com> Committed: Thu Apr 27 18:10:24 2017 +0200 ---------------------------------------------------------------------- cpp/src/arrow/python/common.cc | 22 ++++++++++++++++++++++ cpp/src/arrow/python/common.h | 29 ++++++++++++++--------------- cpp/src/arrow/python/io.cc | 29 +++++++---------------------- cpp/src/arrow/status.h | 3 +++ 4 files changed, 46 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/python/common.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc index 717cb5c..bedd458 100644 --- a/cpp/src/arrow/python/common.cc +++ b/cpp/src/arrow/python/common.cc @@ -64,5 +64,27 @@ PyBuffer::~PyBuffer() { Py_XDECREF(obj_); } +Status CheckPyError(StatusCode code) { + if (PyErr_Occurred()) { + PyObject *exc_type, *exc_value, *traceback; + PyErr_Fetch(&exc_type, &exc_value, &traceback); + PyObjectStringify stringified(exc_value); + Py_XDECREF(exc_type); + Py_XDECREF(exc_value); + Py_XDECREF(traceback); + PyErr_Clear(); + + // ARROW-866: in some esoteric cases, formatting exc_value can fail. This + // was encountered when calling tell() on a socket file + if (stringified.bytes != nullptr) { + std::string message(stringified.bytes); + return Status(code, message); + } else { + return Status(code, "Error message was null"); + } + } + return Status::OK(); +} + } // namespace py } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/python/common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h index 0211823..c5745a5 100644 --- a/cpp/src/arrow/python/common.h +++ b/cpp/src/arrow/python/common.h @@ -98,27 +98,26 @@ struct ARROW_EXPORT PyObjectStringify { if (PyUnicode_Check(obj)) { bytes_obj = PyUnicode_AsUTF8String(obj); tmp_obj.reset(bytes_obj); + bytes = PyBytes_AsString(bytes_obj); + size = PyBytes_GET_SIZE(bytes_obj); + } else if (PyBytes_Check(obj)) { + bytes = PyBytes_AsString(obj); + size = PyBytes_GET_SIZE(obj); } else { - bytes_obj = obj; + bytes = nullptr; + size = -1; } - bytes = PyBytes_AsString(bytes_obj); - size = PyBytes_GET_SIZE(bytes_obj); } }; +Status CheckPyError(StatusCode code = StatusCode::UnknownError); + // TODO(wesm): We can just let errors pass through. To be explored later -#define RETURN_IF_PYERROR() \ - if (PyErr_Occurred()) { \ - PyObject *exc_type, *exc_value, *traceback; \ - PyErr_Fetch(&exc_type, &exc_value, &traceback); \ - PyObjectStringify stringified(exc_value); \ - std::string message(stringified.bytes); \ - Py_DECREF(exc_type); \ - Py_XDECREF(exc_value); \ - Py_XDECREF(traceback); \ - PyErr_Clear(); \ - return Status::UnknownError(message); \ - } +#define RETURN_IF_PYERROR() \ + RETURN_NOT_OK(CheckPyError()); + +#define PY_RETURN_IF_ERROR(CODE) \ + RETURN_NOT_OK(CheckPyError(CODE)); // Return the common PyArrow memory pool ARROW_EXPORT void set_default_memory_pool(MemoryPool* pool); http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/python/io.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc index 327e8fe..a719385 100644 --- a/cpp/src/arrow/python/io.cc +++ b/cpp/src/arrow/python/io.cc @@ -41,21 +41,6 @@ PythonFile::~PythonFile() { Py_DECREF(file_); } -static Status CheckPyError() { - if (PyErr_Occurred()) { - PyObject *exc_type, *exc_value, *traceback; - PyErr_Fetch(&exc_type, &exc_value, &traceback); - PyObjectStringify stringified(exc_value); - std::string message(stringified.bytes); - Py_XDECREF(exc_type); - Py_XDECREF(exc_value); - Py_XDECREF(traceback); - PyErr_Clear(); - return Status::IOError(message); - } - return Status::OK(); -} - // This is annoying: because C++11 does not allow implicit conversion of string // literals to non-const char*, we need to go through some gymnastics to use // PyObject_CallMethod without a lot of pain (its arguments are non-const @@ -71,7 +56,7 @@ Status PythonFile::Close() { // whence: 0 for relative to start of file, 2 for end of file PyObject* result = cpp_PyObject_CallMethod(file_, "close", "()"); Py_XDECREF(result); - ARROW_RETURN_NOT_OK(CheckPyError()); + PY_RETURN_IF_ERROR(StatusCode::IOError); return Status::OK(); } @@ -79,13 +64,13 @@ Status PythonFile::Seek(int64_t position, int whence) { // whence: 0 for relative to start of file, 2 for end of file PyObject* result = cpp_PyObject_CallMethod(file_, "seek", "(ii)", position, whence); Py_XDECREF(result); - ARROW_RETURN_NOT_OK(CheckPyError()); + PY_RETURN_IF_ERROR(StatusCode::IOError); return Status::OK(); } Status PythonFile::Read(int64_t nbytes, PyObject** out) { PyObject* result = cpp_PyObject_CallMethod(file_, "read", "(i)", nbytes); - ARROW_RETURN_NOT_OK(CheckPyError()); + PY_RETURN_IF_ERROR(StatusCode::IOError); *out = result; return Status::OK(); } @@ -93,24 +78,24 @@ Status PythonFile::Read(int64_t nbytes, PyObject** out) { Status PythonFile::Write(const uint8_t* data, int64_t nbytes) { PyObject* py_data = PyBytes_FromStringAndSize(reinterpret_cast<const char*>(data), nbytes); - ARROW_RETURN_NOT_OK(CheckPyError()); + PY_RETURN_IF_ERROR(StatusCode::IOError); PyObject* result = cpp_PyObject_CallMethod(file_, "write", "(O)", py_data); Py_XDECREF(py_data); Py_XDECREF(result); - ARROW_RETURN_NOT_OK(CheckPyError()); + PY_RETURN_IF_ERROR(StatusCode::IOError); return Status::OK(); } Status PythonFile::Tell(int64_t* position) { PyObject* result = cpp_PyObject_CallMethod(file_, "tell", "()"); - ARROW_RETURN_NOT_OK(CheckPyError()); + PY_RETURN_IF_ERROR(StatusCode::IOError); *position = PyLong_AsLongLong(result); Py_DECREF(result); // PyLong_AsLongLong can raise OverflowError - ARROW_RETURN_NOT_OK(CheckPyError()); + PY_RETURN_IF_ERROR(StatusCode::IOError); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/status.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h index dd65b75..6a8cee2 100644 --- a/cpp/src/arrow/status.h +++ b/cpp/src/arrow/status.h @@ -91,6 +91,9 @@ class ARROW_EXPORT Status { Status() : state_(NULL) {} ~Status() { delete[] state_; } + Status(StatusCode code, const std::string& msg) + : Status(code, msg, -1) {} + // Copy the specified status. Status(const Status& s); void operator=(const Status& s);