This is an automated email from the ASF dual-hosted git repository. holden pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d841b33b [SPARK-25153][SQL] Improve error messages for columns with dots/periods d841b33b is described below commit d841b33ba3a9b0504597dbccd4b0d11fa810abf3 Author: Jeff Evans <jeffrey.wayne.ev...@gmail.com> AuthorDate: Mon Sep 30 18:34:44 2019 -0700 [SPARK-25153][SQL] Improve error messages for columns with dots/periods ### What changes were proposed in this pull request? Check schema fields to see if they contain the exact column name, add to error message in DataSet#resolve Add test for extra error message piece Adds an additional check in `DataSet#resolve`, in the else clause (i.e. column not resolved), that appends a suffix to the error message for the `AnalysisException` if that column name is literally found in the schema fields, to suggest to the user that it might need to be quoted via backticks. ### Why are the changes needed? Forgetting to quote such column names is a common occurrence for new Spark users. ### Does this PR introduce any user-facing change? No (other than the extra suffix on the error message). ### How was this patch tested? `test` was run for `core` in `sbt`, and passed. Closes #25807 from jeff303/SPARK-25153. Authored-by: Jeff Evans <jeffrey.wayne.ev...@gmail.com> Signed-off-by: Holden Karau <hka...@apple.com> --- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 12 +++++++++--- .../scala/org/apache/spark/sql/DatasetSuite.scala | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 9a2d800..076270a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -254,10 +254,16 @@ class Dataset[T] private[sql]( @transient lazy val sqlContext: SQLContext = sparkSession.sqlContext private[sql] def resolve(colName: String): NamedExpression = { - queryExecution.analyzed.resolveQuoted(colName, sparkSession.sessionState.analyzer.resolver) + val resolver = sparkSession.sessionState.analyzer.resolver + queryExecution.analyzed.resolveQuoted(colName, resolver) .getOrElse { - throw new AnalysisException( - s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""") + val fields = schema.fieldNames + val extraMsg = if (fields.exists(resolver(_, colName))) { + s"; did you mean to quote the `$colName` column?" + } else "" + val fieldsStr = fields.mkString(", ") + val errorMsg = s"""Cannot resolve column name "$colName" among (${fieldsStr})${extraMsg}""" + throw new AnalysisException(errorMsg) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 42e5ee5..9c50e37 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -21,6 +21,7 @@ import java.io.{Externalizable, ObjectInput, ObjectOutput} import java.sql.{Date, Timestamp} import org.scalatest.exceptions.TestFailedException +import org.scalatest.prop.TableDrivenPropertyChecks._ import org.apache.spark.{SparkException, TaskContext} import org.apache.spark.sql.catalyst.ScroogeLikeExample @@ -1841,6 +1842,24 @@ class DatasetSuite extends QueryTest with SharedSparkSession { val instant = java.time.Instant.parse("2019-03-30T09:54:00Z") assert(spark.range(1).map { _ => instant }.head === instant) } + + val dotColumnTestModes = Table( + ("caseSensitive", "colName"), + ("true", "field.1"), + ("false", "Field.1") + ) + + test("SPARK-25153: Improve error messages for columns with dots/periods") { + forAll(dotColumnTestModes) { (caseSensitive, colName) => + val ds = Seq(SpecialCharClass("1", "2")).toDS + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) { + val errorMsg = intercept[AnalysisException] { + ds(colName) + } + assert(errorMsg.getMessage.contains(s"did you mean to quote the `$colName` column?")) + } + } + } } object AssertExecutionId { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org