This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 936e98dbc07 [SPARK-45574][SQL] Add :: syntax as a shorthand for casting 936e98dbc07 is described below commit 936e98dbc073fcfcb7e6c40720d55dac63a73d51 Author: Ivan Mitic <ivan.mi...@databricks.com> AuthorDate: Sun Oct 22 11:23:01 2023 +0500 [SPARK-45574][SQL] Add :: syntax as a shorthand for casting ### What changes were proposed in this pull request? Adds the `::` syntax as syntactic sugar for casting columns. This is a pretty common syntax across many industry databases. ### Does this PR introduce _any_ user-facing change? Yes, new casting syntax. ### How was this patch tested? Unit tests. SQL tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43430 from mitkedb/master. Authored-by: Ivan Mitic <ivan.mi...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 + .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 1 + .../spark/sql/catalyst/expressions/Cast.scala | 5 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 11 + .../sql/catalyst/parser/CastingSyntaxSuite.scala | 103 ++++++ .../sql-tests/analyzer-results/ansi/cast.sql.out | 234 +++++++++++++ .../sql-tests/analyzer-results/cast.sql.out | 217 ++++++++++++ .../src/test/resources/sql-tests/inputs/cast.sql | 31 ++ .../resources/sql-tests/results/ansi/cast.sql.out | 385 +++++++++++++++++++++ .../test/resources/sql-tests/results/cast.sql.out | 245 +++++++++++++ .../spark/sql/errors/QueryParsingErrorsSuite.scala | 4 +- .../sql/expressions/ExpressionInfoSuite.scala | 4 +- 12 files changed, 1237 insertions(+), 4 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index d9128de0f5d..e8b5cb012fc 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -447,6 +447,7 @@ PIPE: '|'; CONCAT_PIPE: '||'; HAT: '^'; COLON: ':'; +DOUBLE_COLON: '::'; ARROW: '->'; FAT_ARROW : '=>'; HENT_START: '/*+'; diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 77a9108e063..84a31dafed9 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -957,6 +957,7 @@ primaryExpression | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN #cast + | primaryExpression DOUBLE_COLON dataType #castByColon | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA argument+=namedExpression)*)? RIGHT_PAREN #struct | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #first | ANY_VALUE LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #any_value diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index b975dc3c7a5..99117d81b34 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -438,11 +438,14 @@ object Cast extends QueryErrorsBase { * session local timezone by an analyzer [[ResolveTimeZone]]. */ @ExpressionDescription( - usage = "_FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`.", + usage = "_FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`." + + " `expr` :: `type` alternative casting syntax is also supported.", examples = """ Examples: > SELECT _FUNC_('10' as int); 10 + > SELECT '10' :: int; + 10 """, since = "1.0.0", group = "conversion_funcs") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 8ce58ef7688..7e0aafca31c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2128,6 +2128,17 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { } } + /** + * Create a [[Cast]] expression for '::' syntax. + */ + override def visitCastByColon(ctx: CastByColonContext): Expression = withOrigin(ctx) { + val rawDataType = typedVisit[DataType](ctx.dataType()) + val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType) + val cast = Cast(expression(ctx.primaryExpression), dataType) + cast.setTagValue(Cast.USER_SPECIFIED_CAST, ()) + cast + } + /** * Create a [[CreateStruct]] expression. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CastingSyntaxSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CastingSyntaxSuite.scala new file mode 100644 index 00000000000..7f3bb74b292 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CastingSyntaxSuite.scala @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.parser; + +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedFunction, UnresolvedStar} +import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Expression, Literal} +import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException, ParserInterface} +import org.apache.spark.sql.types.{DoubleType, IntegerType} + +class CastingSyntaxSuite extends AnalysisTest { + import org.apache.spark.sql.catalyst.dsl.expressions._ + val defaultParser = CatalystSqlParser + + def assertEqual( + sqlCommand: String, + e: Expression, + parser: ParserInterface = defaultParser): Unit = { + compareExpressions(parser.parseExpression(sqlCommand), e) + } + + def assertFails(sql: String, errorMsg: String): Unit = { + val e = intercept[ParseException](defaultParser.parseExpression(sql)) + assert(e.getMessage.contains(errorMsg)) + } + + test("literals") { + assertEqual("123::double", Cast(Literal(123), DoubleType)) + assertEqual("'123'::double", Cast(Literal("123"), DoubleType)) + assertEqual("'123'::int", Cast(Literal("123"), IntegerType)) + assertEqual("'123.0'::double", Cast(Literal("123.0"), DoubleType)) + assertEqual("'123.0' :: double", Cast(Literal("123.0"), DoubleType)) + assertEqual("`123`::double", Cast(UnresolvedAttribute(Seq("123")), DoubleType)) + + assertEqual("`123::double`", UnresolvedAttribute(Seq("123::double"))) + } + + test("named expressions") { + assertEqual("123::double as v", Alias(Cast(Literal(123), DoubleType), "v")()) + assertEqual("123::double v", Alias(Cast(Literal(123), DoubleType), "v")()) + assertEqual("123 :: double v", Alias(Cast(Literal(123), DoubleType), "v")()) + assertEqual("abc::double v", Alias(Cast(UnresolvedAttribute("abc"), DoubleType), "v")()) + assertEqual("`abc`::double v", Alias(Cast(UnresolvedAttribute("abc"), DoubleType), "v")()) + assertEqual("abc.def::double v", + Alias(Cast(UnresolvedAttribute(Seq("abc", "def")), DoubleType), "v")()) + assertEqual("`abc.def`::double v", + Alias(Cast(UnresolvedAttribute(Seq("abc.def")), DoubleType), "v")()) + } + + test("boolean expressions") { + assertEqual("(a and b) :: int", Cast('a && 'b, IntegerType)) + assertEqual("(a or b) :: int", Cast('a || 'b, IntegerType)) + } + + test("arithmetic expressions") { + assertEqual("(a - b) :: int", Cast('a - 'b, IntegerType)) + assertEqual("(a * b) :: int", Cast('a * 'b, IntegerType)) + assertEqual("a + b :: int", 'a + Cast('b, IntegerType)) + } + + test("star expansion") { + // While these don't make sense, they're not against the parser. Should they be? They work + // with normal casting too + assertEqual("* :: int", Cast(UnresolvedStar(None), IntegerType)) + assertEqual("str.* :: int", Cast(UnresolvedStar(Some(Seq("str"))), IntegerType)) + } + + test("functions") { + assertEqual( + "get_json_object(blob, '$.field')::int", + Cast(UnresolvedFunction("get_json_object", + Seq(UnresolvedAttribute("blob"), Literal("$.field")), + isDistinct = false), IntegerType)) + + assertEqual( + "max(value::double)", + UnresolvedFunction("max", + Seq(Cast(UnresolvedAttribute("value"), DoubleType)), + isDistinct = false)) + + assertEqual( + "cast(value::int as double)", + Cast(Cast(UnresolvedAttribute("value"), IntegerType), DoubleType)) + + assertEqual( + "value::int::double", + Cast(Cast(UnresolvedAttribute("value"), IntegerType), DoubleType)) + } +} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/cast.sql.out index caa324b335c..643dfd3771f 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/cast.sql.out @@ -879,3 +879,237 @@ select cast(10.654321BD as interval month) -- !query analysis Project [cast(10.654321 as interval month) AS CAST(10.654321 AS INTERVAL MONTH)#x] +- OneRowRelation + + +-- !query +SELECT '1.23' :: int +-- !query analysis +Project [cast(1.23 as int) AS CAST(1.23 AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT 'abc' :: int +-- !query analysis +Project [cast(abc as int) AS CAST(abc AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT '12345678901234567890123' :: long +-- !query analysis +Project [cast(12345678901234567890123 as bigint) AS CAST(12345678901234567890123 AS BIGINT)#xL] ++- OneRowRelation + + +-- !query +SELECT '' :: int +-- !query analysis +Project [cast( as int) AS CAST( AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT NULL :: int +-- !query analysis +Project [cast(null as int) AS CAST(NULL AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT '123.a' :: int +-- !query analysis +Project [cast(123.a as int) AS CAST(123.a AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT '-2147483648' :: int +-- !query analysis +Project [cast(-2147483648 as int) AS CAST(-2147483648 AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT HEX('abc' :: binary) +-- !query analysis +Project [hex(cast(abc as binary)) AS hex(CAST(abc AS BINARY))#x] ++- OneRowRelation + + +-- !query +SELECT HEX((123 :: byte) :: binary) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION", + "sqlState" : "42K09", + "messageParameters" : { + "config" : "\"spark.sql.ansi.enabled\"", + "configVal" : "'false'", + "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"", + "srcType" : "\"TINYINT\"", + "targetType" : "\"BINARY\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 12, + "stopIndex" : 34, + "fragment" : "(123 :: byte) :: binary" + } ] +} + + +-- !query +SELECT 'interval 3 month 1 hour' :: interval +-- !query analysis +Project [cast(interval 3 month 1 hour as interval) AS CAST(interval 3 month 1 hour AS INTERVAL)#x] ++- OneRowRelation + + +-- !query +SELECT interval 3 day 1 second :: string +-- !query analysis +Project [cast(INTERVAL '3 00:00:01' DAY TO SECOND as string) AS CAST(INTERVAL '3 00:00:01' DAY TO SECOND AS STRING)#x] ++- OneRowRelation + + +-- !query +select ' 1 ' :: DOUBLE +-- !query analysis +Project [cast( 1 as double) AS CAST( 1 AS DOUBLE)#x] ++- OneRowRelation + + +-- !query +select '1.0 ' :: DEC +-- !query analysis +Project [cast(1.0 as decimal(10,0)) AS CAST(1.0 AS DECIMAL(10,0))#x] ++- OneRowRelation + + +-- !query +select '\t\t true \n\r ' :: boolean +-- !query analysis +Project [cast( true + as boolean) AS CAST( true + AS BOOLEAN)#x] ++- OneRowRelation + + +-- !query +select '2022-01-01 00:00:00' :: timestamp +-- !query analysis +Project [cast(2022-01-01 00:00:00 as timestamp) AS CAST(2022-01-01 00:00:00 AS TIMESTAMP)#x] ++- OneRowRelation + + +-- !query +select interval '-10-2' year to month :: smallint +-- !query analysis +Project [cast(INTERVAL '-10-2' YEAR TO MONTH as smallint) AS CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT)#x] ++- OneRowRelation + + +-- !query +select -10L :: interval second +-- !query analysis +Project [cast(-10 as interval second) AS CAST(-10 AS INTERVAL SECOND)#x] ++- OneRowRelation + + +-- !query +select interval '08:11:10.001' hour to second :: decimal(10, 4) +-- !query analysis +Project [cast(INTERVAL '08:11:10.001' HOUR TO SECOND as decimal(10,4)) AS CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4))#x] ++- OneRowRelation + + +-- !query +select 10.123456BD :: interval day to second +-- !query analysis +Project [cast(10.123456 as interval day to second) AS CAST(10.123456 AS INTERVAL DAY TO SECOND)#x] ++- OneRowRelation + + +-- !query +SELECT '1.23' :: int :: long +-- !query analysis +Project [cast(cast(1.23 as int) as bigint) AS CAST(CAST(1.23 AS INT) AS BIGINT)#xL] ++- OneRowRelation + + +-- !query +SELECT '2147483648' :: long :: int +-- !query analysis +Project [cast(cast(2147483648 as bigint) as int) AS CAST(CAST(2147483648 AS BIGINT) AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2147483648' :: long AS int) +-- !query analysis +Project [cast(cast(2147483648 as bigint) as int) AS CAST(CAST(2147483648 AS BIGINT) AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT map(1, '123', 2, '456')[1] :: int +-- !query analysis +Project [cast(map(1, 123, 2, 456)[1] as int) AS CAST(map(1, 123, 2, 456)[1] AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT '2147483648' :: BINT +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_DATATYPE", + "sqlState" : "0A000", + "messageParameters" : { + "typeName" : "\"BINT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 24, + "stopIndex" : 27, + "fragment" : "BINT" + } ] +} + + +-- !query +SELECT '2147483648' :: SELECT +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_DATATYPE", + "sqlState" : "0A000", + "messageParameters" : { + "typeName" : "\"SELECT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 24, + "stopIndex" : 29, + "fragment" : "SELECT" + } ] +} + + +-- !query +SELECT FALSE IS NOT NULL :: string +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'::'", + "hint" : "" + } +} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out index ad9b63e0fed..e0687b564d3 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out @@ -743,3 +743,220 @@ select cast(10.654321BD as interval month) -- !query analysis Project [cast(10.654321 as interval month) AS CAST(10.654321 AS INTERVAL MONTH)#x] +- OneRowRelation + + +-- !query +SELECT '1.23' :: int +-- !query analysis +Project [cast(1.23 as int) AS CAST(1.23 AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT 'abc' :: int +-- !query analysis +Project [cast(abc as int) AS CAST(abc AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT '12345678901234567890123' :: long +-- !query analysis +Project [cast(12345678901234567890123 as bigint) AS CAST(12345678901234567890123 AS BIGINT)#xL] ++- OneRowRelation + + +-- !query +SELECT '' :: int +-- !query analysis +Project [cast( as int) AS CAST( AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT NULL :: int +-- !query analysis +Project [cast(null as int) AS CAST(NULL AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT '123.a' :: int +-- !query analysis +Project [cast(123.a as int) AS CAST(123.a AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT '-2147483648' :: int +-- !query analysis +Project [cast(-2147483648 as int) AS CAST(-2147483648 AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT HEX('abc' :: binary) +-- !query analysis +Project [hex(cast(abc as binary)) AS hex(CAST(abc AS BINARY))#x] ++- OneRowRelation + + +-- !query +SELECT HEX((123 :: byte) :: binary) +-- !query analysis +Project [hex(cast(cast(123 as tinyint) as binary)) AS hex(CAST(CAST(123 AS TINYINT) AS BINARY))#x] ++- OneRowRelation + + +-- !query +SELECT 'interval 3 month 1 hour' :: interval +-- !query analysis +Project [cast(interval 3 month 1 hour as interval) AS CAST(interval 3 month 1 hour AS INTERVAL)#x] ++- OneRowRelation + + +-- !query +SELECT interval 3 day 1 second :: string +-- !query analysis +Project [cast(INTERVAL '3 00:00:01' DAY TO SECOND as string) AS CAST(INTERVAL '3 00:00:01' DAY TO SECOND AS STRING)#x] ++- OneRowRelation + + +-- !query +select ' 1 ' :: DOUBLE +-- !query analysis +Project [cast( 1 as double) AS CAST( 1 AS DOUBLE)#x] ++- OneRowRelation + + +-- !query +select '1.0 ' :: DEC +-- !query analysis +Project [cast(1.0 as decimal(10,0)) AS CAST(1.0 AS DECIMAL(10,0))#x] ++- OneRowRelation + + +-- !query +select '\t\t true \n\r ' :: boolean +-- !query analysis +Project [cast( true + as boolean) AS CAST( true + AS BOOLEAN)#x] ++- OneRowRelation + + +-- !query +select '2022-01-01 00:00:00' :: timestamp +-- !query analysis +Project [cast(2022-01-01 00:00:00 as timestamp) AS CAST(2022-01-01 00:00:00 AS TIMESTAMP)#x] ++- OneRowRelation + + +-- !query +select interval '-10-2' year to month :: smallint +-- !query analysis +Project [cast(INTERVAL '-10-2' YEAR TO MONTH as smallint) AS CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT)#x] ++- OneRowRelation + + +-- !query +select -10L :: interval second +-- !query analysis +Project [cast(-10 as interval second) AS CAST(-10 AS INTERVAL SECOND)#x] ++- OneRowRelation + + +-- !query +select interval '08:11:10.001' hour to second :: decimal(10, 4) +-- !query analysis +Project [cast(INTERVAL '08:11:10.001' HOUR TO SECOND as decimal(10,4)) AS CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4))#x] ++- OneRowRelation + + +-- !query +select 10.123456BD :: interval day to second +-- !query analysis +Project [cast(10.123456 as interval day to second) AS CAST(10.123456 AS INTERVAL DAY TO SECOND)#x] ++- OneRowRelation + + +-- !query +SELECT '1.23' :: int :: long +-- !query analysis +Project [cast(cast(1.23 as int) as bigint) AS CAST(CAST(1.23 AS INT) AS BIGINT)#xL] ++- OneRowRelation + + +-- !query +SELECT '2147483648' :: long :: int +-- !query analysis +Project [cast(cast(2147483648 as bigint) as int) AS CAST(CAST(2147483648 AS BIGINT) AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2147483648' :: long AS int) +-- !query analysis +Project [cast(cast(2147483648 as bigint) as int) AS CAST(CAST(2147483648 AS BIGINT) AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT map(1, '123', 2, '456')[1] :: int +-- !query analysis +Project [cast(map(1, 123, 2, 456)[1] as int) AS CAST(map(1, 123, 2, 456)[1] AS INT)#x] ++- OneRowRelation + + +-- !query +SELECT '2147483648' :: BINT +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_DATATYPE", + "sqlState" : "0A000", + "messageParameters" : { + "typeName" : "\"BINT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 24, + "stopIndex" : 27, + "fragment" : "BINT" + } ] +} + + +-- !query +SELECT '2147483648' :: SELECT +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_DATATYPE", + "sqlState" : "0A000", + "messageParameters" : { + "typeName" : "\"SELECT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 24, + "stopIndex" : 29, + "fragment" : "SELECT" + } ] +} + + +-- !query +SELECT FALSE IS NOT NULL :: string +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'::'", + "hint" : "" + } +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql index 46ce9fb9aac..2bf53f9730b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql @@ -146,3 +146,34 @@ select cast(10.123456BD as interval day to second); select cast(80.654321BD as interval hour to minute); select cast(-10.123456BD as interval year to month); select cast(10.654321BD as interval month); + +-- cast double colon syntax tests +SELECT '1.23' :: int; +SELECT 'abc' :: int; +SELECT '12345678901234567890123' :: long; +SELECT '' :: int; +SELECT NULL :: int; +SELECT '123.a' :: int; +SELECT '-2147483648' :: int; +SELECT HEX('abc' :: binary); +SELECT HEX((123 :: byte) :: binary); +SELECT 'interval 3 month 1 hour' :: interval; +SELECT interval 3 day 1 second :: string; +select ' 1 ' :: DOUBLE; +select '1.0 ' :: DEC; +select '\t\t true \n\r ' :: boolean; +select '2022-01-01 00:00:00' :: timestamp; +select interval '-10-2' year to month :: smallint; +select -10L :: interval second; +select interval '08:11:10.001' hour to second :: decimal(10, 4); +select 10.123456BD :: interval day to second; + +SELECT '1.23' :: int :: long; +SELECT '2147483648' :: long :: int; +SELECT CAST('2147483648' :: long AS int); +SELECT map(1, '123', 2, '456')[1] :: int; + +-- cast double colon syntax negative tests +SELECT '2147483648' :: BINT; +SELECT '2147483648' :: SELECT; +SELECT FALSE IS NOT NULL :: string; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 355b65f853f..4a60af0411a 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -1659,3 +1659,388 @@ select cast(10.654321BD as interval month) struct<CAST(10.654321 AS INTERVAL MONTH):interval month> -- !query output 0-11 + + +-- !query +SELECT '1.23' :: int +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "'1.23'", + "sourceType" : "\"STRING\"", + "targetType" : "\"INT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 20, + "fragment" : "'1.23' :: int" + } ] +} + + +-- !query +SELECT 'abc' :: int +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "'abc'", + "sourceType" : "\"STRING\"", + "targetType" : "\"INT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 19, + "fragment" : "'abc' :: int" + } ] +} + + +-- !query +SELECT '12345678901234567890123' :: long +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "'12345678901234567890123'", + "sourceType" : "\"STRING\"", + "targetType" : "\"BIGINT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 40, + "fragment" : "'12345678901234567890123' :: long" + } ] +} + + +-- !query +SELECT '' :: int +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "''", + "sourceType" : "\"STRING\"", + "targetType" : "\"INT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 16, + "fragment" : "'' :: int" + } ] +} + + +-- !query +SELECT NULL :: int +-- !query schema +struct<CAST(NULL AS INT):int> +-- !query output +NULL + + +-- !query +SELECT '123.a' :: int +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "'123.a'", + "sourceType" : "\"STRING\"", + "targetType" : "\"INT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 21, + "fragment" : "'123.a' :: int" + } ] +} + + +-- !query +SELECT '-2147483648' :: int +-- !query schema +struct<CAST(-2147483648 AS INT):int> +-- !query output +-2147483648 + + +-- !query +SELECT HEX('abc' :: binary) +-- !query schema +struct<hex(CAST(abc AS BINARY)):string> +-- !query output +616263 + + +-- !query +SELECT HEX((123 :: byte) :: binary) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION", + "sqlState" : "42K09", + "messageParameters" : { + "config" : "\"spark.sql.ansi.enabled\"", + "configVal" : "'false'", + "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"", + "srcType" : "\"TINYINT\"", + "targetType" : "\"BINARY\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 12, + "stopIndex" : 34, + "fragment" : "(123 :: byte) :: binary" + } ] +} + + +-- !query +SELECT 'interval 3 month 1 hour' :: interval +-- !query schema +struct<CAST(interval 3 month 1 hour AS INTERVAL):interval> +-- !query output +3 months 1 hours + + +-- !query +SELECT interval 3 day 1 second :: string +-- !query schema +struct<CAST(INTERVAL '3 00:00:01' DAY TO SECOND AS STRING):string> +-- !query output +INTERVAL '3 00:00:01' DAY TO SECOND + + +-- !query +select ' 1 ' :: DOUBLE +-- !query schema +struct<CAST( 1 AS DOUBLE):double> +-- !query output +1.0 + + +-- !query +select '1.0 ' :: DEC +-- !query schema +struct<CAST(1.0 AS DECIMAL(10,0)):decimal(10,0)> +-- !query output +1 + + +-- !query +select '\t\t true \n\r ' :: boolean +-- !query schema +struct<CAST( true + AS BOOLEAN):boolean> +-- !query output +true + + +-- !query +select '2022-01-01 00:00:00' :: timestamp +-- !query schema +struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP):timestamp> +-- !query output +2022-01-01 00:00:00 + + +-- !query +select interval '-10-2' year to month :: smallint +-- !query schema +struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint> +-- !query output +-122 + + +-- !query +select -10L :: interval second +-- !query schema +struct<CAST(-10 AS INTERVAL SECOND):interval second> +-- !query output +-0 00:00:10.000000000 + + +-- !query +select interval '08:11:10.001' hour to second :: decimal(10, 4) +-- !query schema +struct<CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4)):decimal(10,4)> +-- !query output +29470.0010 + + +-- !query +select 10.123456BD :: interval day to second +-- !query schema +struct<CAST(10.123456 AS INTERVAL DAY TO SECOND):interval day to second> +-- !query output +0 00:00:10.123456000 + + +-- !query +SELECT '1.23' :: int :: long +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "'1.23'", + "sourceType" : "\"STRING\"", + "targetType" : "\"INT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 20, + "fragment" : "'1.23' :: int" + } ] +} + + +-- !query +SELECT '2147483648' :: long :: int +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +{ + "errorClass" : "CAST_OVERFLOW", + "sqlState" : "22003", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "sourceType" : "\"BIGINT\"", + "targetType" : "\"INT\"", + "value" : "2147483648L" + } +} + + +-- !query +SELECT CAST('2147483648' :: long AS int) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +{ + "errorClass" : "CAST_OVERFLOW", + "sqlState" : "22003", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "sourceType" : "\"BIGINT\"", + "targetType" : "\"INT\"", + "value" : "2147483648L" + } +} + + +-- !query +SELECT map(1, '123', 2, '456')[1] :: int +-- !query schema +struct<CAST(map(1, 123, 2, 456)[1] AS INT):int> +-- !query output +123 + + +-- !query +SELECT '2147483648' :: BINT +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_DATATYPE", + "sqlState" : "0A000", + "messageParameters" : { + "typeName" : "\"BINT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 24, + "stopIndex" : 27, + "fragment" : "BINT" + } ] +} + + +-- !query +SELECT '2147483648' :: SELECT +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_DATATYPE", + "sqlState" : "0A000", + "messageParameters" : { + "typeName" : "\"SELECT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 24, + "stopIndex" : 29, + "fragment" : "SELECT" + } ] +} + + +-- !query +SELECT FALSE IS NOT NULL :: string +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'::'", + "hint" : "" + } +} diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index 75c2470e61d..8536a564d80 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -914,3 +914,248 @@ select cast(10.654321BD as interval month) struct<CAST(10.654321 AS INTERVAL MONTH):interval month> -- !query output 0-11 + + +-- !query +SELECT '1.23' :: int +-- !query schema +struct<CAST(1.23 AS INT):int> +-- !query output +1 + + +-- !query +SELECT 'abc' :: int +-- !query schema +struct<CAST(abc AS INT):int> +-- !query output +NULL + + +-- !query +SELECT '12345678901234567890123' :: long +-- !query schema +struct<CAST(12345678901234567890123 AS BIGINT):bigint> +-- !query output +NULL + + +-- !query +SELECT '' :: int +-- !query schema +struct<CAST( AS INT):int> +-- !query output +NULL + + +-- !query +SELECT NULL :: int +-- !query schema +struct<CAST(NULL AS INT):int> +-- !query output +NULL + + +-- !query +SELECT '123.a' :: int +-- !query schema +struct<CAST(123.a AS INT):int> +-- !query output +NULL + + +-- !query +SELECT '-2147483648' :: int +-- !query schema +struct<CAST(-2147483648 AS INT):int> +-- !query output +-2147483648 + + +-- !query +SELECT HEX('abc' :: binary) +-- !query schema +struct<hex(CAST(abc AS BINARY)):string> +-- !query output +616263 + + +-- !query +SELECT HEX((123 :: byte) :: binary) +-- !query schema +struct<hex(CAST(CAST(123 AS TINYINT) AS BINARY)):string> +-- !query output +7B + + +-- !query +SELECT 'interval 3 month 1 hour' :: interval +-- !query schema +struct<CAST(interval 3 month 1 hour AS INTERVAL):interval> +-- !query output +3 months 1 hours + + +-- !query +SELECT interval 3 day 1 second :: string +-- !query schema +struct<CAST(INTERVAL '3 00:00:01' DAY TO SECOND AS STRING):string> +-- !query output +INTERVAL '3 00:00:01' DAY TO SECOND + + +-- !query +select ' 1 ' :: DOUBLE +-- !query schema +struct<CAST( 1 AS DOUBLE):double> +-- !query output +1.0 + + +-- !query +select '1.0 ' :: DEC +-- !query schema +struct<CAST(1.0 AS DECIMAL(10,0)):decimal(10,0)> +-- !query output +1 + + +-- !query +select '\t\t true \n\r ' :: boolean +-- !query schema +struct<CAST( true + AS BOOLEAN):boolean> +-- !query output +true + + +-- !query +select '2022-01-01 00:00:00' :: timestamp +-- !query schema +struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP):timestamp> +-- !query output +2022-01-01 00:00:00 + + +-- !query +select interval '-10-2' year to month :: smallint +-- !query schema +struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint> +-- !query output +-122 + + +-- !query +select -10L :: interval second +-- !query schema +struct<CAST(-10 AS INTERVAL SECOND):interval second> +-- !query output +-0 00:00:10.000000000 + + +-- !query +select interval '08:11:10.001' hour to second :: decimal(10, 4) +-- !query schema +struct<CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4)):decimal(10,4)> +-- !query output +29470.0010 + + +-- !query +select 10.123456BD :: interval day to second +-- !query schema +struct<CAST(10.123456 AS INTERVAL DAY TO SECOND):interval day to second> +-- !query output +0 00:00:10.123456000 + + +-- !query +SELECT '1.23' :: int :: long +-- !query schema +struct<CAST(CAST(1.23 AS INT) AS BIGINT):bigint> +-- !query output +1 + + +-- !query +SELECT '2147483648' :: long :: int +-- !query schema +struct<CAST(CAST(2147483648 AS BIGINT) AS INT):int> +-- !query output +-2147483648 + + +-- !query +SELECT CAST('2147483648' :: long AS int) +-- !query schema +struct<CAST(CAST(2147483648 AS BIGINT) AS INT):int> +-- !query output +-2147483648 + + +-- !query +SELECT map(1, '123', 2, '456')[1] :: int +-- !query schema +struct<CAST(map(1, 123, 2, 456)[1] AS INT):int> +-- !query output +123 + + +-- !query +SELECT '2147483648' :: BINT +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_DATATYPE", + "sqlState" : "0A000", + "messageParameters" : { + "typeName" : "\"BINT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 24, + "stopIndex" : 27, + "fragment" : "BINT" + } ] +} + + +-- !query +SELECT '2147483648' :: SELECT +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_DATATYPE", + "sqlState" : "0A000", + "messageParameters" : { + "typeName" : "\"SELECT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 24, + "stopIndex" : 29, + "fragment" : "SELECT" + } ] +} + + +-- !query +SELECT FALSE IS NOT NULL :: string +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'::'", + "hint" : "" + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala index 7ebb677b121..97d66095f64 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala @@ -630,7 +630,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT<INT>)"), errorClass = "PARSE_SYNTAX_ERROR", sqlState = "42601", - parameters = Map("error" -> "'>'", "hint" -> "")) + parameters = Map("error" -> "'<'", "hint" -> ": missing ')'")) } test("INCOMPLETE_TYPE_DEFINITION: map type definition is incomplete") { @@ -651,7 +651,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL exception = parseException("SELECT CAST(map('1',2) AS MAP<STRING>)"), errorClass = "PARSE_SYNTAX_ERROR", sqlState = "42601", - parameters = Map("error" -> "'>'", "hint" -> "")) + parameters = Map("error" -> "'<'", "hint" -> ": missing ')'")) } test("INVALID_ESC: Escape string must contain only one character") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index 1d522718116..fd6f0adccf7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -115,7 +115,9 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { // _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)` "org.apache.spark.sql.catalyst.expressions.Remainder", // Examples demonstrate alternative names, see SPARK-20749 - "org.apache.spark.sql.catalyst.expressions.Length") + "org.apache.spark.sql.catalyst.expressions.Length", + // Examples demonstrate alternative syntax, see SPARK-45574 + "org.apache.spark.sql.catalyst.expressions.Cast") spark.sessionState.functionRegistry.listFunction().foreach { funcId => val info = spark.sessionState.catalog.lookupFunctionInfo(funcId) val className = info.getClassName --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org