andygrove commented on code in PR #3667:
URL: https://github.com/apache/datafusion-comet/pull/3667#discussion_r2921165539
##########
native/spark-expr/src/error.rs:
##########
@@ -15,855 +15,16 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::error::ArrowError;
-use datafusion::common::DataFusionError;
-use std::sync::Arc;
-
-#[derive(thiserror::Error, Debug, Clone)]
-pub enum SparkError {
- // This list was generated from the Spark code. Many of the exceptions are
not yet used by Comet
- #[error("[CAST_INVALID_INPUT] The value '{value}' of the type
\"{from_type}\" cannot be cast to \"{to_type}\" \
- because it is malformed. Correct the value as per the syntax, or
change its target type. \
- Use `try_cast` to tolerate malformed input and return NULL instead. If
necessary \
- set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- CastInvalidValue {
- value: String,
- from_type: String,
- to_type: String,
- },
-
- #[error("[NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION] {value} cannot be
represented as Decimal({precision}, {scale}). If necessary set
\"spark.sql.ansi.enabled\" to \"false\" to bypass this error, and return NULL
instead.")]
- NumericValueOutOfRange {
- value: String,
- precision: u8,
- scale: i8,
- },
-
- #[error("[NUMERIC_OUT_OF_SUPPORTED_RANGE] The value {value} cannot be
interpreted as a numeric since it has more than 38 digits.")]
- NumericOutOfRange { value: String },
-
- #[error("[CAST_OVERFLOW] The value {value} of the type \"{from_type}\"
cannot be cast to \"{to_type}\" \
- due to an overflow. Use `try_cast` to tolerate overflow and return
NULL instead. If necessary \
- set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- CastOverFlow {
- value: String,
- from_type: String,
- to_type: String,
- },
-
- #[error("[CANNOT_PARSE_DECIMAL] Cannot parse decimal.")]
- CannotParseDecimal,
-
- #[error("[ARITHMETIC_OVERFLOW] {from_type} overflow. If necessary set
\"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- ArithmeticOverflow { from_type: String },
-
- #[error("[ARITHMETIC_OVERFLOW] Overflow in integral divide. Use
`try_divide` to tolerate overflow and return NULL instead. If necessary set
\"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- IntegralDivideOverflow,
-
- #[error("[ARITHMETIC_OVERFLOW] Overflow in sum of decimals. If necessary
set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- DecimalSumOverflow,
-
- #[error("[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate
divisor being 0 and return NULL instead. If necessary set
\"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- DivideByZero,
-
- #[error("[REMAINDER_BY_ZERO] Division by zero. Use `try_remainder` to
tolerate divisor being 0 and return NULL instead. If necessary set
\"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- RemainderByZero,
-
- #[error("[INTERVAL_DIVIDED_BY_ZERO] Divide by zero in interval
arithmetic.")]
- IntervalDividedByZero,
-
- #[error("[BINARY_ARITHMETIC_OVERFLOW] {value1} {symbol} {value2} caused
overflow. Use `{function_name}` to tolerate overflow and return NULL instead.
If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this
error.")]
- BinaryArithmeticOverflow {
- value1: String,
- symbol: String,
- value2: String,
- function_name: String,
- },
-
- #[error("[INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION] Interval
arithmetic overflow. Use `{function_name}` to tolerate overflow and return NULL
instead. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass
this error.")]
- IntervalArithmeticOverflowWithSuggestion { function_name: String },
-
- #[error("[INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION] Interval
arithmetic overflow. If necessary set \"spark.sql.ansi.enabled\" to \"false\"
to bypass this error.")]
- IntervalArithmeticOverflowWithoutSuggestion,
-
- #[error("[DATETIME_OVERFLOW] Datetime arithmetic overflow.")]
- DatetimeOverflow,
-
- #[error("[INVALID_ARRAY_INDEX] The index {index_value} is out of bounds.
The array has {array_size} elements. Use the SQL function get() to tolerate
accessing element at invalid index and return NULL instead. If necessary set
\"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- InvalidArrayIndex { index_value: i32, array_size: i32 },
-
- #[error("[INVALID_ARRAY_INDEX_IN_ELEMENT_AT] The index {index_value} is
out of bounds. The array has {array_size} elements. Use try_element_at to
tolerate accessing element at invalid index and return NULL instead. If
necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- InvalidElementAtIndex { index_value: i32, array_size: i32 },
-
- #[error("[INVALID_BITMAP_POSITION] The bit position {bit_position} is out
of bounds. The bitmap has {bitmap_num_bytes} bytes ({bitmap_num_bits} bits).")]
- InvalidBitmapPosition {
- bit_position: i64,
- bitmap_num_bytes: i64,
- bitmap_num_bits: i64,
- },
-
- #[error("[INVALID_INDEX_OF_ZERO] The index 0 is invalid. An index shall be
either < 0 or > 0 (the first element has index 1).")]
- InvalidIndexOfZero,
-
- #[error("[DUPLICATED_MAP_KEY] Cannot create map with duplicate keys:
{key}.")]
- DuplicatedMapKey { key: String },
-
- #[error("[NULL_MAP_KEY] Cannot use null as map key.")]
- NullMapKey,
-
- #[error("[MAP_KEY_VALUE_DIFF_SIZES] The key array and value array of a map
must have the same length.")]
- MapKeyValueDiffSizes,
-
- #[error("[EXCEED_LIMIT_LENGTH] Cannot create a map with {size} elements
which exceeds the limit {max_size}.")]
- ExceedMapSizeLimit { size: i32, max_size: i32 },
-
- #[error("[COLLECTION_SIZE_LIMIT_EXCEEDED] Cannot create array with
{num_elements} elements which exceeds the limit {max_elements}.")]
- CollectionSizeLimitExceeded {
- num_elements: i64,
- max_elements: i64,
- },
-
- #[error("[NOT_NULL_ASSERT_VIOLATION] The field `{field_name}` cannot be
null.")]
- NotNullAssertViolation { field_name: String },
-
- #[error("[VALUE_IS_NULL] The value of field `{field_name}` at row
{row_index} is null.")]
- ValueIsNull { field_name: String, row_index: i32 },
-
- #[error("[CANNOT_PARSE_TIMESTAMP] Cannot parse timestamp: {message}. Try
using `{suggested_func}` instead.")]
- CannotParseTimestamp {
- message: String,
- suggested_func: String,
- },
-
- #[error("[INVALID_FRACTION_OF_SECOND] The fraction of second {value} is
invalid. Valid values are in the range [0, 60]. If necessary set
\"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
- InvalidFractionOfSecond { value: f64 },
-
- #[error("[INVALID_UTF8_STRING] Invalid UTF-8 string: {hex_string}.")]
- InvalidUtf8String { hex_string: String },
-
- #[error("[UNEXPECTED_POSITIVE_VALUE] The {parameter_name} parameter must
be less than or equal to 0. The actual value is {actual_value}.")]
- UnexpectedPositiveValue {
- parameter_name: String,
- actual_value: i32,
- },
-
- #[error("[UNEXPECTED_NEGATIVE_VALUE] The {parameter_name} parameter must
be greater than or equal to 0. The actual value is {actual_value}.")]
- UnexpectedNegativeValue {
- parameter_name: String,
- actual_value: i32,
- },
-
- #[error("[INVALID_PARAMETER_VALUE] Invalid regex group index {group_index}
in function `{function_name}`. Group count is {group_count}.")]
- InvalidRegexGroupIndex {
- function_name: String,
- group_count: i32,
- group_index: i32,
- },
-
- #[error("[DATATYPE_CANNOT_ORDER] Cannot order by type: {data_type}.")]
- DatatypeCannotOrder { data_type: String },
-
- #[error("[SCALAR_SUBQUERY_TOO_MANY_ROWS] Scalar subquery returned more
than one row.")]
- ScalarSubqueryTooManyRows,
-
- #[error("ArrowError: {0}.")]
- Arrow(Arc<ArrowError>),
-
- #[error("InternalError: {0}.")]
- Internal(String),
-}
-
-impl SparkError {
- /// Serialize this error to JSON format for JNI transfer
- pub fn to_json(&self) -> String {
- let error_class = self.error_class().unwrap_or("");
-
- // Create a JSON structure with errorType, errorClass, and params
- match serde_json::to_string(&serde_json::json!({
- "errorType": self.error_type_name(),
- "errorClass": error_class,
- "params": self.params_as_json(),
- })) {
- Ok(json) => json,
- Err(e) => {
- // Fallback if serialization fails
- format!(
-
"{{\"errorType\":\"SerializationError\",\"message\":\"{}\"}}",
- e
- )
- }
- }
- }
-
- /// Get the error type name for JSON serialization
- fn error_type_name(&self) -> &'static str {
- match self {
- SparkError::CastInvalidValue { .. } => "CastInvalidValue",
- SparkError::NumericValueOutOfRange { .. } =>
"NumericValueOutOfRange",
- SparkError::NumericOutOfRange { .. } => "NumericOutOfRange",
- SparkError::CastOverFlow { .. } => "CastOverFlow",
- SparkError::CannotParseDecimal => "CannotParseDecimal",
- SparkError::ArithmeticOverflow { .. } => "ArithmeticOverflow",
- SparkError::IntegralDivideOverflow => "IntegralDivideOverflow",
- SparkError::DecimalSumOverflow => "DecimalSumOverflow",
- SparkError::DivideByZero => "DivideByZero",
- SparkError::RemainderByZero => "RemainderByZero",
- SparkError::IntervalDividedByZero => "IntervalDividedByZero",
- SparkError::BinaryArithmeticOverflow { .. } =>
"BinaryArithmeticOverflow",
- SparkError::IntervalArithmeticOverflowWithSuggestion { .. } => {
- "IntervalArithmeticOverflowWithSuggestion"
- }
- SparkError::IntervalArithmeticOverflowWithoutSuggestion => {
- "IntervalArithmeticOverflowWithoutSuggestion"
- }
- SparkError::DatetimeOverflow => "DatetimeOverflow",
- SparkError::InvalidArrayIndex { .. } => "InvalidArrayIndex",
- SparkError::InvalidElementAtIndex { .. } =>
"InvalidElementAtIndex",
- SparkError::InvalidBitmapPosition { .. } =>
"InvalidBitmapPosition",
- SparkError::InvalidIndexOfZero => "InvalidIndexOfZero",
- SparkError::DuplicatedMapKey { .. } => "DuplicatedMapKey",
- SparkError::NullMapKey => "NullMapKey",
- SparkError::MapKeyValueDiffSizes => "MapKeyValueDiffSizes",
- SparkError::ExceedMapSizeLimit { .. } => "ExceedMapSizeLimit",
- SparkError::CollectionSizeLimitExceeded { .. } =>
"CollectionSizeLimitExceeded",
- SparkError::NotNullAssertViolation { .. } =>
"NotNullAssertViolation",
- SparkError::ValueIsNull { .. } => "ValueIsNull",
- SparkError::CannotParseTimestamp { .. } => "CannotParseTimestamp",
- SparkError::InvalidFractionOfSecond { .. } =>
"InvalidFractionOfSecond",
- SparkError::InvalidUtf8String { .. } => "InvalidUtf8String",
- SparkError::UnexpectedPositiveValue { .. } =>
"UnexpectedPositiveValue",
- SparkError::UnexpectedNegativeValue { .. } =>
"UnexpectedNegativeValue",
- SparkError::InvalidRegexGroupIndex { .. } =>
"InvalidRegexGroupIndex",
- SparkError::DatatypeCannotOrder { .. } => "DatatypeCannotOrder",
- SparkError::ScalarSubqueryTooManyRows =>
"ScalarSubqueryTooManyRows",
- SparkError::Arrow(_) => "Arrow",
- SparkError::Internal(_) => "Internal",
- }
- }
-
- /// Extract parameters as JSON value
- fn params_as_json(&self) -> serde_json::Value {
- match self {
- SparkError::CastInvalidValue {
- value,
- from_type,
- to_type,
- } => {
- serde_json::json!({
- "value": value,
- "fromType": from_type,
- "toType": to_type,
- })
- }
- SparkError::NumericValueOutOfRange {
- value,
- precision,
- scale,
- } => {
- serde_json::json!({
- "value": value,
- "precision": precision,
- "scale": scale,
- })
- }
- SparkError::NumericOutOfRange { value } => {
- serde_json::json!({
- "value": value,
- })
- }
- SparkError::CastOverFlow {
- value,
- from_type,
- to_type,
- } => {
- serde_json::json!({
- "value": value,
- "fromType": from_type,
- "toType": to_type,
- })
- }
- SparkError::ArithmeticOverflow { from_type } => {
- serde_json::json!({
- "fromType": from_type,
- })
- }
- SparkError::BinaryArithmeticOverflow {
- value1,
- symbol,
- value2,
- function_name,
- } => {
- serde_json::json!({
- "value1": value1,
- "symbol": symbol,
- "value2": value2,
- "functionName": function_name,
- })
- }
- SparkError::IntervalArithmeticOverflowWithSuggestion {
function_name } => {
- serde_json::json!({
- "functionName": function_name,
- })
- }
- SparkError::InvalidArrayIndex {
- index_value,
- array_size,
- } => {
- serde_json::json!({
- "indexValue": index_value,
- "arraySize": array_size,
- })
- }
- SparkError::InvalidElementAtIndex {
- index_value,
- array_size,
- } => {
- serde_json::json!({
- "indexValue": index_value,
- "arraySize": array_size,
- })
- }
- SparkError::InvalidBitmapPosition {
- bit_position,
- bitmap_num_bytes,
- bitmap_num_bits,
- } => {
- serde_json::json!({
- "bitPosition": bit_position,
- "bitmapNumBytes": bitmap_num_bytes,
- "bitmapNumBits": bitmap_num_bits,
- })
- }
- SparkError::DuplicatedMapKey { key } => {
- serde_json::json!({
- "key": key,
- })
- }
- SparkError::ExceedMapSizeLimit { size, max_size } => {
- serde_json::json!({
- "size": size,
- "maxSize": max_size,
- })
- }
- SparkError::CollectionSizeLimitExceeded {
- num_elements,
- max_elements,
- } => {
- serde_json::json!({
- "numElements": num_elements,
- "maxElements": max_elements,
- })
- }
- SparkError::NotNullAssertViolation { field_name } => {
- serde_json::json!({
- "fieldName": field_name,
- })
- }
- SparkError::ValueIsNull {
- field_name,
- row_index,
- } => {
- serde_json::json!({
- "fieldName": field_name,
- "rowIndex": row_index,
- })
- }
- SparkError::CannotParseTimestamp {
- message,
- suggested_func,
- } => {
- serde_json::json!({
- "message": message,
- "suggestedFunc": suggested_func,
- })
- }
- SparkError::InvalidFractionOfSecond { value } => {
- serde_json::json!({
- "value": value,
- })
- }
- SparkError::InvalidUtf8String { hex_string } => {
- serde_json::json!({
- "hexString": hex_string,
- })
- }
- SparkError::UnexpectedPositiveValue {
- parameter_name,
- actual_value,
- } => {
- serde_json::json!({
- "parameterName": parameter_name,
- "actualValue": actual_value,
- })
- }
- SparkError::UnexpectedNegativeValue {
- parameter_name,
- actual_value,
- } => {
- serde_json::json!({
- "parameterName": parameter_name,
- "actualValue": actual_value,
- })
- }
- SparkError::InvalidRegexGroupIndex {
- function_name,
- group_count,
- group_index,
- } => {
- serde_json::json!({
- "functionName": function_name,
- "groupCount": group_count,
- "groupIndex": group_index,
- })
- }
- SparkError::DatatypeCannotOrder { data_type } => {
- serde_json::json!({
- "dataType": data_type,
- })
- }
- SparkError::Arrow(e) => {
- serde_json::json!({
- "message": e.to_string(),
- })
- }
- SparkError::Internal(msg) => {
- serde_json::json!({
- "message": msg,
- })
- }
- // Simple errors with no parameters
- _ => serde_json::json!({}),
- }
- }
-
- /// Returns the appropriate Spark exception class for this error
- pub fn exception_class(&self) -> &'static str {
- match self {
- // ArithmeticException
- SparkError::DivideByZero
- | SparkError::RemainderByZero
- | SparkError::IntervalDividedByZero
- | SparkError::NumericValueOutOfRange { .. }
- | SparkError::NumericOutOfRange { .. } // Comet-specific extension
- | SparkError::ArithmeticOverflow { .. }
- | SparkError::IntegralDivideOverflow
- | SparkError::DecimalSumOverflow
- | SparkError::BinaryArithmeticOverflow { .. }
- | SparkError::IntervalArithmeticOverflowWithSuggestion { .. }
- | SparkError::IntervalArithmeticOverflowWithoutSuggestion
- | SparkError::DatetimeOverflow =>
"org/apache/spark/SparkArithmeticException",
-
- // CastOverflow gets special handling with CastOverflowException
- SparkError::CastOverFlow { .. } =>
"org/apache/spark/sql/comet/CastOverflowException",
-
- // NumberFormatException (for cast invalid input errors)
- SparkError::CastInvalidValue { .. } =>
"org/apache/spark/SparkNumberFormatException",
-
- // ArrayIndexOutOfBoundsException
- SparkError::InvalidArrayIndex { .. }
- | SparkError::InvalidElementAtIndex { .. }
- | SparkError::InvalidBitmapPosition { .. }
- | SparkError::InvalidIndexOfZero =>
"org/apache/spark/SparkArrayIndexOutOfBoundsException",
-
- // RuntimeException
- SparkError::CannotParseDecimal
- | SparkError::DuplicatedMapKey { .. }
- | SparkError::NullMapKey
- | SparkError::MapKeyValueDiffSizes
- | SparkError::ExceedMapSizeLimit { .. }
- | SparkError::CollectionSizeLimitExceeded { .. }
- | SparkError::NotNullAssertViolation { .. }
- | SparkError::ValueIsNull { .. } // Comet-specific extension
- | SparkError::UnexpectedPositiveValue { .. }
- | SparkError::UnexpectedNegativeValue { .. }
- | SparkError::InvalidRegexGroupIndex { .. }
- | SparkError::ScalarSubqueryTooManyRows =>
"org/apache/spark/SparkRuntimeException",
-
- // DateTimeException
- SparkError::CannotParseTimestamp { .. }
- | SparkError::InvalidFractionOfSecond { .. } =>
"org/apache/spark/SparkDateTimeException",
-
- // IllegalArgumentException
- SparkError::DatatypeCannotOrder { .. }
- | SparkError::InvalidUtf8String { .. } =>
"org/apache/spark/SparkIllegalArgumentException",
-
- // Generic errors
- SparkError::Arrow(_) | SparkError::Internal(_) =>
"org/apache/spark/SparkException",
- }
- }
-
- /// Returns the Spark error class code for this error
- pub fn error_class(&self) -> Option<&'static str> {
- match self {
- // Cast errors
- SparkError::CastInvalidValue { .. } => Some("CAST_INVALID_INPUT"),
- SparkError::CastOverFlow { .. } => Some("CAST_OVERFLOW"),
- SparkError::NumericValueOutOfRange { .. } => {
- Some("NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION")
- }
- SparkError::NumericOutOfRange { .. } =>
Some("NUMERIC_OUT_OF_SUPPORTED_RANGE"),
- SparkError::CannotParseDecimal => Some("CANNOT_PARSE_DECIMAL"),
-
- // Arithmetic errors
- SparkError::DivideByZero => Some("DIVIDE_BY_ZERO"),
- SparkError::RemainderByZero => Some("REMAINDER_BY_ZERO"),
- SparkError::IntervalDividedByZero =>
Some("INTERVAL_DIVIDED_BY_ZERO"),
- SparkError::ArithmeticOverflow { .. } =>
Some("ARITHMETIC_OVERFLOW"),
- SparkError::IntegralDivideOverflow => Some("ARITHMETIC_OVERFLOW"),
- SparkError::DecimalSumOverflow => Some("ARITHMETIC_OVERFLOW"),
- SparkError::BinaryArithmeticOverflow { .. } =>
Some("BINARY_ARITHMETIC_OVERFLOW"),
- SparkError::IntervalArithmeticOverflowWithSuggestion { .. } => {
- Some("INTERVAL_ARITHMETIC_OVERFLOW")
- }
- SparkError::IntervalArithmeticOverflowWithoutSuggestion => {
- Some("INTERVAL_ARITHMETIC_OVERFLOW")
- }
- SparkError::DatetimeOverflow => Some("DATETIME_OVERFLOW"),
-
- // Array index errors
- SparkError::InvalidArrayIndex { .. } =>
Some("INVALID_ARRAY_INDEX"),
- SparkError::InvalidElementAtIndex { .. } =>
Some("INVALID_ARRAY_INDEX_IN_ELEMENT_AT"),
- SparkError::InvalidBitmapPosition { .. } =>
Some("INVALID_BITMAP_POSITION"),
- SparkError::InvalidIndexOfZero => Some("INVALID_INDEX_OF_ZERO"),
-
- // Map/Collection errors
- SparkError::DuplicatedMapKey { .. } => Some("DUPLICATED_MAP_KEY"),
- SparkError::NullMapKey => Some("NULL_MAP_KEY"),
- SparkError::MapKeyValueDiffSizes =>
Some("MAP_KEY_VALUE_DIFF_SIZES"),
- SparkError::ExceedMapSizeLimit { .. } =>
Some("EXCEED_LIMIT_LENGTH"),
- SparkError::CollectionSizeLimitExceeded { .. } => {
- Some("COLLECTION_SIZE_LIMIT_EXCEEDED")
- }
-
- // Null validation errors
- SparkError::NotNullAssertViolation { .. } =>
Some("NOT_NULL_ASSERT_VIOLATION"),
- SparkError::ValueIsNull { .. } => Some("VALUE_IS_NULL"),
-
- // DateTime errors
- SparkError::CannotParseTimestamp { .. } =>
Some("CANNOT_PARSE_TIMESTAMP"),
- SparkError::InvalidFractionOfSecond { .. } =>
Some("INVALID_FRACTION_OF_SECOND"),
-
- // String/UTF8 errors
- SparkError::InvalidUtf8String { .. } =>
Some("INVALID_UTF8_STRING"),
-
- // Function parameter errors
- SparkError::UnexpectedPositiveValue { .. } =>
Some("UNEXPECTED_POSITIVE_VALUE"),
- SparkError::UnexpectedNegativeValue { .. } =>
Some("UNEXPECTED_NEGATIVE_VALUE"),
-
- // Regex errors
- SparkError::InvalidRegexGroupIndex { .. } =>
Some("INVALID_PARAMETER_VALUE"),
-
- // Unsupported operation errors
- SparkError::DatatypeCannotOrder { .. } =>
Some("DATATYPE_CANNOT_ORDER"),
-
- // Subquery errors
- SparkError::ScalarSubqueryTooManyRows =>
Some("SCALAR_SUBQUERY_TOO_MANY_ROWS"),
-
- // Generic errors (no error class)
- SparkError::Arrow(_) | SparkError::Internal(_) => None,
- }
- }
-}
+// Re-export SparkError types from jvm-bridge crate
Review Comment:
I agree that this went too far. I ran into an issue with implementing `From`
in a different crate to where things were defined. I'll see if I can find a
better approach.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]