This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 652c950183 [arrow-pyarrow]: restore nicer pyarrow-arrow error message
(#9639)
652c950183 is described below
commit 652c95018349b37864799f088bb3d7b5eba97e90
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Apr 1 16:50:47 2026 -0400
[arrow-pyarrow]: restore nicer pyarrow-arrow error message (#9639)
# Which issue does this PR close?
- Follow on to https://github.com/apache/arrow-rs/pull/9594
# Rationale for this change
@kylebarron says
https://github.com/apache/arrow-rs/pull/9594#discussion_r3004995827:
> fwiw previously there was a nice user-facing error here, while now the
error generated from extract will be much more obtuse. Ideally this
exception will never be raised except if the producer doesn't follow the
spec correctly.
# What changes are included in this PR?
Restore the nice error
# Are these changes tested?
yes, added a test
# Are there any user-facing changes?
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
If there are any breaking changes to public APIs, please call them out.
-->
---
arrow-pyarrow-testing/tests/pyarrow.rs | 31 +++++++++++++++++++++++++++++++
arrow-pyarrow/src/lib.rs | 24 ++++++++++++++++++++----
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/arrow-pyarrow-testing/tests/pyarrow.rs
b/arrow-pyarrow-testing/tests/pyarrow.rs
index 4ca661b104..6f3606478c 100644
--- a/arrow-pyarrow-testing/tests/pyarrow.rs
+++ b/arrow-pyarrow-testing/tests/pyarrow.rs
@@ -42,7 +42,10 @@ use arrow_array::{
Array, ArrayRef, BinaryViewArray, Int32Array, RecordBatch, StringArray,
StringViewArray,
};
use arrow_pyarrow::{FromPyArrow, ToPyArrow};
+use pyo3::exceptions::PyTypeError;
+use pyo3::types::{PyAnyMethods, PyModule};
use pyo3::Python;
+use std::ffi::CString;
use std::sync::Arc;
#[test]
@@ -94,6 +97,34 @@ fn test_to_pyarrow_byte_view() {
}
}
+#[test]
+fn test_from_pyarrow_non_tuple() {
+ Python::initialize();
+
+ Python::attach(|py| {
+ let code = CString::new(
+ r#"
+class NotATuple:
+ def __arrow_c_array__(self):
+ return 1
+
+value = NotATuple()
+"#,
+ )
+ .unwrap();
+
+ let module = PyModule::from_code(py, code.as_c_str(), c"test.py",
c"test_module").unwrap();
+ let value = module.getattr("value").unwrap();
+
+ let err = RecordBatch::from_pyarrow_bound(&value).unwrap_err();
+ assert!(err.is_instance_of::<PyTypeError>(py));
+ assert_eq!(
+ err.to_string(),
+ "TypeError: Expected __arrow_c_array__ to return a tuple of
(schema, array) capsules."
+ );
+ });
+}
+
fn binary_view_column(num_variadic_buffers: usize) -> BinaryViewArray {
let long_scalar = b"but soft what light through yonder window
breaks".as_slice();
let mut builder =
BinaryViewBuilder::new().with_fixed_block_size(long_scalar.len() as u32);
diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs
index 95f1d38fdd..d8f584e396 100644
--- a/arrow-pyarrow/src/lib.rs
+++ b/arrow-pyarrow/src/lib.rs
@@ -149,6 +149,24 @@ fn validate_pycapsule(capsule: &Bound<PyCapsule>, name:
&str) -> PyResult<()> {
Ok(())
}
+fn extract_arrow_c_array_capsules<'py>(
+ value: &Bound<'py, PyAny>,
+) -> PyResult<(Bound<'py, PyCapsule>, Bound<'py, PyCapsule>)> {
+ let tuple = value.call_method0("__arrow_c_array__")?;
+
+ if !tuple.is_instance_of::<PyTuple>() {
+ return Err(PyTypeError::new_err(
+ "Expected __arrow_c_array__ to return a tuple of (schema, array)
capsules.",
+ ));
+ }
+
+ tuple.extract().map_err(|_| {
+ PyTypeError::new_err(
+ "Expected __arrow_c_array__ to return a tuple of (schema, array)
capsules.",
+ )
+ })
+}
+
impl FromPyArrow for DataType {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow
data implement this
@@ -245,8 +263,7 @@ impl FromPyArrow for ArrayData {
// method, so prefer it over _export_to_c.
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_array__")? {
- let (schema_capsule, array_capsule) =
- value.call_method0("__arrow_c_array__")?.extract()?;
+ let (schema_capsule, array_capsule) =
extract_arrow_c_array_capsules(value)?;
validate_pycapsule(&schema_capsule, "arrow_schema")?;
validate_pycapsule(&array_capsule, "arrow_array")?;
@@ -324,8 +341,7 @@ impl FromPyArrow for RecordBatch {
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_array__")? {
- let (schema_capsule, array_capsule) =
- value.call_method0("__arrow_c_array__")?.extract()?;
+ let (schema_capsule, array_capsule) =
extract_arrow_c_array_capsules(value)?;
validate_pycapsule(&schema_capsule, "arrow_schema")?;
validate_pycapsule(&array_capsule, "arrow_array")?;