This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e8dfcd3081ab [SPARK-46515] Add MONTHNAME function e8dfcd3081ab is described below commit e8dfcd3081abe16b2115bb2944a2b1cb547eca8e Author: Stefan Kandic <stefan.kan...@databricks.com> AuthorDate: Fri Jan 5 23:20:51 2024 +0300 [SPARK-46515] Add MONTHNAME function ### What changes were proposed in this pull request? Added MONTHNAME function which returns three-letter abbreviated month name for a given date to: - Scala API - Python API - R API - Spark Connect Scala Client - Spark Connect Python Client ### Why are the changes needed? for parity with Snowflake ### Does this PR introduce _any_ user-facing change? Yes, new MONTHNAME function ### How was this patch tested? With newly added unit tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #44483 from stefankandic/monthname-function. Authored-by: Stefan Kandic <stefan.kan...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- R/pkg/R/functions.R | 14 ++++++++++ R/pkg/R/generics.R | 4 +++ R/pkg/tests/fulltests/test_sparkSQL.R | 1 + .../scala/org/apache/spark/sql/functions.scala | 9 +++++++ .../apache/spark/sql/PlanGenerationTestSuite.scala | 4 +++ .../explain-results/function_monthname.explain | 2 ++ .../query-tests/queries/function_monthname.json | 25 +++++++++++++++++ .../queries/function_monthname.proto.bin | Bin 0 -> 123 bytes .../source/reference/pyspark.sql/functions.rst | 1 + python/pyspark/sql/connect/functions/builtin.py | 7 +++++ python/pyspark/sql/functions/builtin.py | 30 +++++++++++++++++++++ python/pyspark/sql/tests/test_functions.py | 6 +++++ .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../catalyst/expressions/datetimeExpressions.scala | 17 ++++++++++++ .../spark/sql/catalyst/util/DateTimeUtils.scala | 12 +++++++++ .../expressions/DateExpressionsSuite.scala | 11 ++++++++ .../scala/org/apache/spark/sql/functions.scala | 9 +++++++ .../sql-functions/sql-expression-schema.md | 1 + .../org/apache/spark/sql/DateFunctionsSuite.scala | 12 +++++++++ 19 files changed, 166 insertions(+) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 14b4cb8cbdaa..0db25ad8739f 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1091,6 +1091,20 @@ setMethod("dayofyear", column(jc) }) +#' @details +#' \code{monthname}: Extracts the three-letter abbreviated month name from a +#' given date/timestamp/string. +#' +#' @rdname column_datetime_functions +#' @aliases monthname monthname,Column-method +#' @note monthname since 4.0.0 +setMethod("monthname", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "monthname", x@jc) + column(jc) + }) + #' @details #' \code{decode}: Computes the first argument into a string from a binary using the provided #' character set. diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 2f46f63ba38a..92febc02710d 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1020,6 +1020,10 @@ setGeneric("dayofweek", function(x) { standardGeneric("dayofweek") }) #' @name NULL setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") }) +#' @rdname column_datetime_functions +#' @name NULL +setGeneric("monthname", function(x) { standardGeneric("monthname") }) + #' @rdname column_string_functions #' @name NULL setGeneric("decode", function(x, charset) { standardGeneric("decode") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index c1a5292195af..118c853a00df 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2062,6 +2062,7 @@ test_that("date functions on a DataFrame", { expect_equal(collect(select(df, weekofyear(df$b)))[, 1], c(50, 50, 51)) expect_equal(collect(select(df, year(df$b)))[, 1], c(2012, 2013, 2014)) expect_equal(collect(select(df, month(df$b)))[, 1], c(12, 12, 12)) + expect_equal(collect(select(df, monthname(df$b)))[, 1], c("Dec", "Dec", "Dec")) expect_equal(collect(select(df, last_day(df$b)))[, 1], c(as.Date("2012-12-31"), as.Date("2013-12-31"), as.Date("2014-12-31"))) expect_equal(collect(select(df, next_day(df$b, "MONDAY")))[, 1], diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index 061fca276a3d..9191633171f7 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -5944,6 +5944,15 @@ object functions { def to_unix_timestamp(timeExp: Column): Column = Column.fn("to_unix_timestamp", timeExp) + /** + * Extracts the three-letter abbreviated month name from a given date/timestamp/string. + * + * @group datetime_funcs + * @since 4.0.0 + */ + def monthname(timeExp: Column): Column = + Column.fn("monthname", timeExp) + ////////////////////////////////////////////////////////////////////////////////////////////// // Collection functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index 0a7768aa488b..3e6e4b7a3a02 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -2125,6 +2125,10 @@ class PlanGenerationTestSuite fn.months_between(fn.current_date(), fn.col("d"), roundOff = true) } + temporalFunctionTest("monthname") { + fn.monthname(fn.col("d")) + } + temporalFunctionTest("next_day") { fn.next_day(fn.col("d"), "Mon") } diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_monthname.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_monthname.explain new file mode 100644 index 000000000000..672d076ef8bf --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_monthname.explain @@ -0,0 +1,2 @@ +Project [monthname(d#0) AS monthname(d)#0] ++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_monthname.json b/connector/connect/common/src/test/resources/query-tests/queries/function_monthname.json new file mode 100644 index 000000000000..c5ad3485252f --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_monthname.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "monthname", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "d" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin new file mode 100644 index 000000000000..4518bb8d7425 Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin differ diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst index d1dba5f2bede..cbdadd9cd2bd 100644 --- a/python/docs/source/reference/pyspark.sql/functions.rst +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -270,6 +270,7 @@ Date and Timestamp Functions make_ym_interval minute month + monthname months_between next_day now diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 461694362612..c9bf5fadd91c 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -2980,6 +2980,13 @@ def weekday(col: "ColumnOrName") -> Column: weekday.__doc__ = pysparkfuncs.weekday.__doc__ +def monthname(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("monthname", col) + + +monthname.__doc__ = pysparkfuncs.monthname.__doc__ + + def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column: return _invoke_function_over_columns("extract", field, source) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index bd151050e77f..bac59808cd3f 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -7273,6 +7273,36 @@ def weekday(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("weekday", col) +@_try_remote_functions +def monthname(col: "ColumnOrName") -> Column: + """ + Returns the three-letter abbreviated month name from the given date. + + .. versionadded:: 4.0.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + target date/timestamp column to work on. + + Returns + ------- + :class:`~pyspark.sql.Column` + the three-letter abbreviation of month name for date/timestamp (Jan, Feb, Mar...) + + Examples + -------- + >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) + >>> df.select(monthname('dt').alias('month')).show() + +-----+ + |month| + +-----+ + | Apr| + +-----+ + """ + return _invoke_function_over_columns("monthname", col) + + @_try_remote_functions def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column: """ diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index aaf58136508a..c67ddcde44ea 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -415,6 +415,12 @@ class FunctionsTestsMixin: row = df.select(F.dayofweek(df.date)).first() self.assertEqual(row[0], 2) + def test_monthname(self): + dt = datetime.datetime(2017, 11, 6) + df = self.spark.createDataFrame([Row(date=dt)]) + row = df.select(F.monthname(df.date)).first() + self.assertEqual(row[0], "Nov") + # Test added for SPARK-37738; change Python API to accept both col & int as input def test_date_add_function(self): dt = datetime.date(2021, 12, 27) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 3f61dfc5f2ed..a9b1178a8dba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -649,6 +649,7 @@ object FunctionRegistry { expression[WindowTime]("window_time"), expression[MakeDate]("make_date"), expression[MakeTimestamp]("make_timestamp"), + expression[MonthName]("monthname"), // We keep the 2 expression builders below to have different function docs. expressionBuilder("make_timestamp_ntz", MakeTimestampNTZExpressionBuilder, setAlias = true), expressionBuilder("make_timestamp_ltz", MakeTimestampLTZExpressionBuilder, setAlias = true), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 50a9dbf7f899..9be260a9f3da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -911,6 +911,23 @@ case class WeekOfYear(child: Expression) extends GetDateField { copy(child = newChild) } +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the three-letter abbreviated month name from the given date.", + examples = """ + Examples: + > SELECT _FUNC_('2008-02-20'); + Feb + """, + group = "datetime_funcs", + since = "4.0.0") +case class MonthName(child: Expression) extends GetDateField { + override val func = DateTimeUtils.getMonthName + override val funcName = "getMonthName" + override def dataType: DataType = StringType + override protected def withNewChildInternal(newChild: Expression): MonthName = + copy(child = newChild) +} + // scalastyle:off line.size.limit @ExpressionDescription( usage = "_FUNC_(timestamp, fmt) - Converts `timestamp` to a value of string in the format specified by the date format `fmt`.", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 128582f71d11..cb93814e90e5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.util import java.time._ +import java.time.format.TextStyle import java.time.temporal.{ChronoField, ChronoUnit, IsoFields, Temporal} import java.util.Locale import java.util.concurrent.TimeUnit._ @@ -196,6 +197,17 @@ object DateTimeUtils extends SparkDateTimeUtils { localDateToDays(daysToLocalDate(days).plusMonths(months)) } + /** + * Returns the three-letter abbreviated month name for the given number of days since 1970-01-01. + */ + def getMonthName(days: Int): UTF8String = { + val monthName = Month + .of(getMonth(days)) + .getDisplayName(TextStyle.SHORT, DateFormatter.defaultLocale) + + UTF8String.fromString(monthName) + } + /** * Adds months to a timestamp at the given time zone. It converts the input timestamp to a local * timestamp at the given time zone, adds months, and converts the resulted local timestamp diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index d20101026903..09c2b6f5cc9b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -285,6 +285,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkConsistencyBetweenInterpretedAndCodegen(WeekOfYear, DateType) } + test("MonthName") { + checkEvaluation(MonthName(Literal.create(null, DateType)), null) + checkEvaluation(MonthName(Literal(d)), "Apr") + checkEvaluation(MonthName(Cast(Literal(date), DateType, UTC_OPT)), "Apr") + checkEvaluation(MonthName(Cast(Literal(ts), DateType, UTC_OPT)), "Nov") + checkEvaluation(MonthName(Cast(Literal("2011-05-06"), DateType, UTC_OPT)), "May") + checkEvaluation(MonthName(Literal(new Date(toMillis("2017-01-27 13:10:15")))), "Jan") + checkEvaluation(MonthName(Literal(new Date(toMillis("1582-12-15 13:10:15")))), "Dec") + checkConsistencyBetweenInterpretedAndCodegen(MonthName, DateType) + } + test("DateFormat") { Seq("legacy", "corrected").foreach { legacyParserPolicy => withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index d8b5a4b416c9..97963c4ed924 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -5738,6 +5738,15 @@ object functions { def to_unix_timestamp(timeExp: Column): Column = Column.fn("to_unix_timestamp", timeExp) + /** + * Extracts the three-letter abbreviated month name from a given date/timestamp/string. + * + * @group datetime_funcs + * @since 4.0.0 + */ + def monthname(timeExp: Column): Column = + Column.fn("monthname", timeExp) + ////////////////////////////////////////////////////////////////////////////////////////////// // Collection functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 1a04fd57090d..cff7921db0e5 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -221,6 +221,7 @@ | org.apache.spark.sql.catalyst.expressions.Minute | minute | SELECT minute('2009-07-30 12:58:59') | struct<minute(2009-07-30 12:58:59):int> | | org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID | monotonically_increasing_id | SELECT monotonically_increasing_id() | struct<monotonically_increasing_id():bigint> | | org.apache.spark.sql.catalyst.expressions.Month | month | SELECT month('2016-07-30') | struct<month(2016-07-30):int> | +| org.apache.spark.sql.catalyst.expressions.MonthName | monthname | SELECT monthname('2008-02-20') | struct<monthname(2008-02-20):string> | | org.apache.spark.sql.catalyst.expressions.MonthsBetween | months_between | SELECT months_between('1997-02-28 10:30:00', '1996-10-30') | struct<months_between(1997-02-28 10:30:00, 1996-10-30, true):double> | | org.apache.spark.sql.catalyst.expressions.Multiply | * | SELECT 2 * 3 | struct<(2 * 3):int> | | org.apache.spark.sql.catalyst.expressions.Murmur3Hash | hash | SELECT hash('Spark', array(123), 2) | struct<hash(Spark, array(123), 2):int> | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index a8c304ff66d0..7d608f4a08c4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -267,6 +267,18 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { Row(2, 2, 0)) } + test("monthname") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(monthname($"a"), monthname($"b"), monthname($"c")), + Row("Apr", "Apr", "Apr")) + + checkAnswer( + df.selectExpr("monthname(a)", "monthname(b)", "monthname(c)"), + Row("Apr", "Apr", "Apr")) + } + test("extract") { val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org