This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1439d9b275e [SPARK-38734][SQL] Remove the error class `INDEX_OUT_OF_BOUNDS` 1439d9b275e is described below commit 1439d9b275e844b5b595126bc97d2b44f6e859ed Author: Max Gekk <max.g...@gmail.com> AuthorDate: Tue Sep 13 10:54:53 2022 +0300 [SPARK-38734][SQL] Remove the error class `INDEX_OUT_OF_BOUNDS` ### What changes were proposed in this pull request? In the PR, I propose to remove the error class `INDEX_OUT_OF_BOUNDS` from `error-classes.json` and the exception `SparkIndexOutOfBoundsException`. And replace the last one by a SparkException w/ the error class `INTERNAL_ERROR` because the exception should not be raised in regular cases. `ArrayDataIndexedSeq` throws the exception from `apply()`, and `ArrayDataIndexedSeq` can be created from `ArrayData.toSeq` only. The last one is invoked from 2 places: 1. The `Slice` expression ( or `slice` function): https://github.com/apache/spark/blob/443eea97578c41870c343cdb88cf69bfdf27033a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L1600-L1601 where any access to the produced array is guarded: ```sql spark-sql> set spark.sql.ansi.enabled=true; spark.sql.ansi.enabled true Time taken: 2.415 seconds, Fetched 1 row(s) spark-sql> SELECT slice(array(1, 2, 3, 4), 2, 2)[4]; ... org.apache.spark.SparkArrayIndexOutOfBoundsException: [INVALID_ARRAY_INDEX] The index 4 is out of bounds. The array has 2 elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT slice(array(1, 2, 3, 4), 2, 2)[4] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ at org.apache.spark.sql.errors.QueryExecutionErrors$.invalidArrayIndexError(QueryExecutionErrors.scala:239) at org.apache.spark.sql.catalyst.expressions.GetArrayItem.nullSafeEval(complexTypeExtractors.scala:271) ``` see https://github.com/apache/spark/blob/a9bb924480e4953457dad680c15ca346f71a26c8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala#L268-L271 2. `MapObjects.convertToSeq`: https://github.com/apache/spark/blob/5b96e82ad6a4f5d5e4034d9d7112077159cf5044/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala#L886 where any access to the produced IndexedSeq is guarded via map-way access in https://github.com/apache/spark/blob/5b96e82ad6a4f5d5e4034d9d7112077159cf5044/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala#L864-L867 ### Why are the changes needed? To improve code maintenance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the affected test suite: ``` $ build/sbt "core/testOnly *SparkThrowableSuite" $ build/sbt "test:testOnly *ArrayDataIndexedSeqSuite" ``` Closes #37857 from MaxGekk/rm-INDEX_OUT_OF_BOUNDS. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 6 ------ .../main/scala/org/apache/spark/SparkException.scala | 16 ---------------- .../org/apache/spark/sql/catalyst/util/ArrayData.scala | 4 +++- .../apache/spark/sql/errors/QueryExecutionErrors.scala | 5 ----- .../sql/catalyst/util/ArrayDataIndexedSeqSuite.scala | 18 ++++++++++-------- 5 files changed, 13 insertions(+), 36 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 3e15334931b..29f1f4f0b30 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -239,12 +239,6 @@ } } }, - "INDEX_OUT_OF_BOUNDS" : { - "message" : [ - "Index <indexValue> must be between 0 and the length of the ArrayData." - ], - "sqlState" : "22023" - }, "INTERNAL_ERROR" : { "message" : [ "<message>" diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index 67aa8cdfcac..aea796685ee 100644 --- a/core/src/main/scala/org/apache/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -316,22 +316,6 @@ private[spark] class SparkIllegalArgumentException( override def getQueryContext: Array[QueryContext] = context } -/** - * Index out of bounds exception thrown from Spark with an error class. - */ -private[spark] class SparkIndexOutOfBoundsException( - errorClass: String, - errorSubClass: Option[String] = None, - messageParameters: Array[String]) - extends IndexOutOfBoundsException( - SparkThrowableHelper.getMessage(errorClass, errorSubClass.orNull, messageParameters)) - with SparkThrowable { - - override def getMessageParameters: Array[String] = messageParameters - override def getErrorClass: String = errorClass - override def getErrorSubClass: String = errorSubClass.orNull -} - /** * IO exception thrown from Spark with an error class. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala index f5497665f2f..51759df901c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.util import scala.reflect.ClassTag +import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, UnsafeArrayData} import org.apache.spark.sql.errors.QueryExecutionErrors @@ -200,7 +201,8 @@ class ArrayDataIndexedSeq[T](arrayData: ArrayData, dataType: DataType) extends I if (0 <= idx && idx < arrayData.numElements()) { accessor(arrayData, idx).asInstanceOf[T] } else { - throw QueryExecutionErrors.indexOutOfBoundsOfArrayDataError(idx) + throw SparkException.internalError( + s"Index $idx must be between 0 and the length of the ArrayData.") } override def length: Int = arrayData.numElements() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index f4ec70e81d9..662a10cf3ac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1372,11 +1372,6 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { """.stripMargin.replaceAll("\n", " ")) } - def indexOutOfBoundsOfArrayDataError(idx: Int): Throwable = { - new SparkIndexOutOfBoundsException( - errorClass = "INDEX_OUT_OF_BOUNDS", None, Array(toSQLValue(idx, IntegerType))) - } - def malformedRecordsDetectedInRecordParsingError(e: BadRecordException): Throwable = { new SparkException("Malformed records are detected in record parsing. " + s"Parse Mode: ${FailFastMode.name}. To process malformed records as null " + diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala index 56d2af7cb7e..b015829e672 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util import scala.util.Random -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.sql.RandomDataGenerator import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder} import org.apache.spark.sql.catalyst.expressions.{SafeProjection, UnsafeProjection} @@ -53,13 +53,15 @@ class ArrayDataIndexedSeqSuite extends SparkFunSuite { } } - intercept[IndexOutOfBoundsException] { - seq(-1) - }.getMessage().contains("must be between 0 and the length of the ArrayData.") - - intercept[IndexOutOfBoundsException] { - seq(seq.length) - }.getMessage().contains("must be between 0 and the length of the ArrayData.") + Seq(-1, seq.length).foreach { index => + checkError( + exception = intercept[SparkException] { + seq(index) + }, + errorClass = "INTERNAL_ERROR", + parameters = Map( + "message" -> s"Index $index must be between 0 and the length of the ArrayData.")) + } } private def testArrayData(): Unit = { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org