martin-g commented on code in PR #8759:
URL: https://github.com/apache/arrow-rs/pull/8759#discussion_r2510708913
##########
arrow-avro/src/reader/header.rs:
##########
@@ -39,7 +39,7 @@ pub(crate) fn read_header<R: BufRead>(mut reader: R) ->
Result<Header, ArrowErro
}
}
decoder.flush().ok_or_else(|| {
- ArrowError::ParseError("Unexpected EOF while reading Avro
header".to_string())
+ AvroError::ParseError("Unexpected EOF while reading Avro
header".to_string())
Review Comment:
```suggestion
AvroError::EOF("Unexpected EOF while reading Avro
header".to_string())
```
##########
arrow-avro/src/reader/mod.rs:
##########
@@ -835,7 +835,7 @@ impl Decoder {
// We must flush the active decoder before switching to the pending
one.
let batch = self.flush_and_reset();
self.apply_pending_schema();
- batch
+ batch.map_err(ArrowError::from)
Review Comment:
Is this correct ?
The `flush_and_reset()` now returns `Result<Option<RecordBatch>>`, i.e. an
`AvroError` but `flush` still returns an `ArrowError`
(Result<Option<RecordBatch>, ArrowError>)
##########
arrow-avro/src/writer/encoder.rs:
##########
@@ -155,13 +149,12 @@ fn write_sign_extended<W: Write + ?Sized>(
if src_be[..extra].iter().any(|&b| b != sign_byte)
|| ((src_be[extra] ^ sign_byte) & 0x80) != 0
{
- return Err(ArrowError::InvalidArgumentError(format!(
+ return Err(AvroError::InvalidArgument(format!(
"Decimal value with {len} bytes cannot be represented in {n}
bytes without overflow",
)));
}
- return out
- .write_all(&src_be[extra..])
- .map_err(|e| ArrowError::IoError(format!("write decimal fixed:
{e}"), e));
Review Comment:
I don't think this addresses @alamb's concerns. Now `e` is string-ified and
it losses the `io::Error` source. AFAIS only External preserves the
source/cause
(https://github.com/elastiflow/arrow-rs/blob/b40bb9c6b70ecefee8647e0c3dd9be77bc6ca2bf/arrow-avro/src/errors.rs#L54)
##########
arrow-avro/src/reader/mod.rs:
##########
@@ -1009,13 +1009,13 @@ impl ReaderBuilder {
&self,
header: Option<&Header>,
reader_schema: Option<&AvroSchema>,
- ) -> Result<Decoder, ArrowError> {
+ ) -> Result<Decoder> {
if let Some(hdr) = header {
let writer_schema = hdr
.schema()
- .map_err(|e| ArrowError::ExternalError(Box::new(e)))?
+ .map_err(|e| AvroError::External(Box::new(e)))?
Review Comment:
Is this really needed ?
It is already an AvroError, no need to wrap it in another one.
##########
arrow-avro/src/errors.rs:
##########
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Common Avro errors and macros.
+
+use arrow_schema::ArrowError;
+use core::num::TryFromIntError;
+use std::error::Error;
+use std::string::FromUtf8Error;
+use std::{io, result, str};
+
+/// Avro error enumeration
+
+#[derive(Debug)]
+#[non_exhaustive]
+pub enum AvroError {
+ /// General Avro error.
+ /// Returned when code violates normal workflow of working with Avro data.
+ General(String),
+ /// "Not yet implemented" Avro error.
+ /// Returned when functionality is not yet available.
+ NYI(String),
+ /// "End of file" Avro error.
+ /// Returned when IO related failures occur, e.g. when there are not
enough bytes to
+ /// decode.
+ EOF(String),
+ /// Arrow error.
+ /// Returned when reading into arrow or writing from arrow.
+ ArrowError(Box<ArrowError>),
+ /// Error when the requested index is more than the
+ /// number of items expected
+ IndexOutOfBound(usize, usize),
+ /// Error indicating that an unexpected or bad argument was passed to a
function.
+ InvalidArgument(String),
+ /// Error indicating that a value could not be parsed.
+ ParseError(String),
+ /// Error indicating that a schema is invalid.
+ SchemaError(String),
+ /// An external error variant
+ External(Box<dyn Error + Send + Sync>),
+ /// Returned when a function needs more data to complete properly. The
`usize` field indicates
+ /// the total number of bytes required, not the number of additional bytes.
+ NeedMoreData(usize),
+ /// Returned when a function needs more data to complete properly.
+ /// The `Range<u64>` indicates the range of bytes that are needed.
+ NeedMoreDataRange(std::ops::Range<u64>),
+}
+
+impl std::fmt::Display for AvroError {
+ fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match &self {
+ AvroError::General(message) => {
+ write!(fmt, "Avro error: {message}")
+ }
+ AvroError::NYI(message) => write!(fmt, "NYI: {message}"),
+ AvroError::EOF(message) => write!(fmt, "EOF: {message}"),
+ AvroError::ArrowError(message) => write!(fmt, "Arrow: {message}"),
+ AvroError::IndexOutOfBound(index, bound) => {
+ write!(fmt, "Index {index} out of bound: {bound}")
+ }
+ AvroError::InvalidArgument(message) => {
+ write!(fmt, "Invalid argument: {message}")
+ }
+ AvroError::ParseError(message) => write!(fmt, "Parse error:
{message}"),
+ AvroError::SchemaError(message) => write!(fmt, "Schema error:
{message}"),
+ AvroError::External(e) => write!(fmt, "External: {e}"),
+ AvroError::NeedMoreData(needed) => write!(fmt, "NeedMoreData:
{needed}"),
+ AvroError::NeedMoreDataRange(range) => {
+ write!(fmt, "NeedMoreDataRange: {}..{}", range.start,
range.end)
+ }
+ }
+ }
+}
+
+impl Error for AvroError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ match self {
+ AvroError::External(e) => Some(e.as_ref()),
+ AvroError::ArrowError(e) => Some(e.as_ref()),
+ _ => None,
+ }
+ }
+}
+
+impl From<TryFromIntError> for AvroError {
+ fn from(e: TryFromIntError) -> AvroError {
+ AvroError::General(format!("Integer overflow: {e}"))
+ }
+}
+
+impl From<io::Error> for AvroError {
+ fn from(e: io::Error) -> AvroError {
+ AvroError::External(Box::new(e))
+ }
+}
+
+impl From<str::Utf8Error> for AvroError {
+ fn from(e: str::Utf8Error) -> AvroError {
+ AvroError::External(Box::new(e))
+ }
+}
+
+impl From<FromUtf8Error> for AvroError {
+ fn from(e: FromUtf8Error) -> AvroError {
+ AvroError::External(Box::new(e))
+ }
+}
+
+impl From<ArrowError> for AvroError {
+ fn from(e: ArrowError) -> Self {
+ AvroError::ArrowError(Box::new(e))
+ }
+}
+
+/// A specialized `Result` for Avro errors.
+pub type Result<T, E = AvroError> = result::Result<T, E>;
+
+impl From<AvroError> for io::Error {
+ fn from(e: AvroError) -> Self {
+ io::Error::other(e)
+ }
+}
+
+impl From<AvroError> for ArrowError {
+ fn from(e: AvroError) -> Self {
+ match e {
+ AvroError::External(inner) =>
ArrowError::from_external_error(inner),
+ AvroError::ArrowError(inner) =>
ArrowError::from_external_error(inner),
Review Comment:
```suggestion
AvroError::ArrowError(inner) => *inner,
```
?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]