This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6d29c72f6c2b [SPARK-46875][SQL] When the `mode` is null, a `NullPointException` should `not` be thrown 6d29c72f6c2b is described below commit 6d29c72f6c2bdf534c2b079f9a3563bf8cfc8dab Author: panbingkun <panbing...@baidu.com> AuthorDate: Sat Jan 27 10:53:39 2024 +0300 [SPARK-46875][SQL] When the `mode` is null, a `NullPointException` should `not` be thrown ### What changes were proposed in this pull request? The pr aims to provide better prompts when option's `mode` is null. ### Why are the changes needed? In the original logic, if the mode is null, Spark will throw a `NullPointerException`, which is obviously unfriendly to the user. ``` val cars = spark.read .format("csv") .options(Map("header" -> "true", "mode" -> null)) .load(testFile(carsFile)) cars.show(false) ``` Before: ``` Cannot invoke "String.toUpperCase(java.util.Locale)" because "mode" is null java.lang.NullPointerException: Cannot invoke "String.toUpperCase(java.util.Locale)" because "mode" is null at org.apache.spark.sql.catalyst.util.ParseMode$.fromString(ParseMode.scala:50) at org.apache.spark.sql.catalyst.csv.CSVOptions.$anonfun$parseMode$1(CSVOptions.scala:105) at scala.Option.map(Option.scala:242) at org.apache.spark.sql.catalyst.csv.CSVOptions.<init>(CSVOptions.scala:105) at org.apache.spark.sql.catalyst.csv.CSVOptions.<init>(CSVOptions.scala:49) at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat.inferSchema(CSVFileFormat.scala:60) ``` After: It will fall back to `PermissiveMode` mode and then display the data normally, as shown below: ``` 18:54:06.727 WARN org.apache.spark.sql.catalyst.util.ParseMode: mode is null and not a valid parse mode. Using PERMISSIVE. +----+-----+-----+----------------------------------+-----+ |year|make |model|comment |blank| +----+-----+-----+----------------------------------+-----+ |2012|Tesla|S |No comment |NULL | |1997|Ford |E350 |Go get one now they are going fast|NULL | |2015|Chevy|Volt |NULL |NULL | +----+-----+-----+----------------------------------+-----+ ``` ### Does this PR introduce _any_ user-facing change? Yes, When `mode` is null, it fallback to `PermissiveMode ` instead of throwing a `NullPointerException`. ### How was this patch tested? - Add new UT. - Pass GA. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44900 from panbingkun/SPARK-46875. Authored-by: panbingkun <panbing...@baidu.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../apache/spark/sql/catalyst/util/ParseMode.scala | 19 ++++++++++++------- .../sql/execution/datasources/csv/CSVSuite.scala | 10 ++++++++++ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala index 2beb875d1751..b35da8e2c80f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala @@ -47,12 +47,17 @@ object ParseMode extends Logging { /** * Returns the parse mode from the given string. */ - def fromString(mode: String): ParseMode = mode.toUpperCase(Locale.ROOT) match { - case PermissiveMode.name => PermissiveMode - case DropMalformedMode.name => DropMalformedMode - case FailFastMode.name => FailFastMode - case _ => - logWarning(s"$mode is not a valid parse mode. Using ${PermissiveMode.name}.") - PermissiveMode + def fromString(mode: String): ParseMode = Option(mode).map { + v => v.toUpperCase(Locale.ROOT) match { + case PermissiveMode.name => PermissiveMode + case DropMalformedMode.name => DropMalformedMode + case FailFastMode.name => FailFastMode + case _ => + logWarning(s"$v is not a valid parse mode. Using ${PermissiveMode.name}.") + PermissiveMode + } + }.getOrElse { + logWarning(s"mode is null and not a valid parse mode. Using ${PermissiveMode.name}.") + PermissiveMode } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 8e6282bd5a42..2ec9e1086b92 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -348,6 +348,16 @@ abstract class CSVSuite } } + test("when mode is null, will fall back to PermissiveMode mode") { + val cars = spark.read + .format("csv") + .options(Map("header" -> "true", "mode" -> null)) + .load(testFile(carsFile)) + assert(cars.collect().length == 3) + assert(cars.select("make").collect() sameElements + Array(Row("Tesla"), Row("Ford"), Row("Chevy"))) + } + test("test for blank column names on read and select columns") { val cars = spark.read .format("csv") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org