This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 3224cddcf9da [SPARK-46324][SQL][PYTHON] Fix the output name of pyspark.sql.functions.user and session_user 3224cddcf9da is described below commit 3224cddcf9da913c964b775b5912a67cd1e968b2 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Fri Dec 8 12:48:24 2023 -0800 [SPARK-46324][SQL][PYTHON] Fix the output name of pyspark.sql.functions.user and session_user ### What changes were proposed in this pull request? This PR proposes to fix `user()` and `session_user()` to have the same names in its output name. ### Why are the changes needed? To show the correct name of the functions being used. ### Does this PR introduce _any_ user-facing change? Yes. ```scala spark.range(1).select(user(), session_user()).show() ``` Before: ``` +--------------+--------------+ |current_user()|current_user()| +--------------+--------------+ | hyukjin.kwon| hyukjin.kwon| +--------------+--------------+ ``` After: ``` +--------------+--------------+ | user()|session_user()| +--------------+--------------+ | hyukjin.kwon| hyukjin.kwon| +--------------+--------------+ ``` ### How was this patch tested? Manually tested, and unittests were added. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44253 from HyukjinKwon/user-name. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../query-tests/explain-results/function_session_user.explain | 2 +- .../resources/query-tests/explain-results/function_user.explain | 2 +- python/pyspark/sql/functions/builtin.py | 4 ++-- python/pyspark/sql/tests/test_functions.py | 9 +++++++++ .../scala/org/apache/spark/sql/catalyst/expressions/misc.scala | 5 +++-- .../src/test/resources/sql-functions/sql-expression-schema.md | 6 +++--- 6 files changed, 19 insertions(+), 9 deletions(-) diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain index 82f5d2adcec0..b6205d9fb56c 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain @@ -1,2 +1,2 @@ -Project [current_user() AS current_user()#0] +Project [session_user() AS session_user()#0] +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain index 82f5d2adcec0..52746c58c000 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain @@ -1,2 +1,2 @@ -Project [current_user() AS current_user()#0] +Project [user() AS user()#0] +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 997b641080cf..e1cffff01018 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -8967,7 +8967,7 @@ def user() -> Column: >>> import pyspark.sql.functions as sf >>> spark.range(1).select(sf.user()).show() # doctest: +SKIP +--------------+ - |current_user()| + | user()| +--------------+ | ruifeng.zheng| +--------------+ @@ -8986,7 +8986,7 @@ def session_user() -> Column: >>> import pyspark.sql.functions as sf >>> spark.range(1).select(sf.session_user()).show() # doctest: +SKIP +--------------+ - |current_user()| + |session_user()| +--------------+ | ruifeng.zheng| +--------------+ diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index b59417d8a310..5352ee04d7fe 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -1355,6 +1355,15 @@ class FunctionsTestsMixin: message_parameters={"arg_name": "gapDuration", "arg_type": "int"}, ) + def test_current_user(self): + df = self.spark.range(1).select(F.current_user()) + self.assertIsInstance(df.first()[0], str) + self.assertEqual(df.schema.names[0], "current_user()") + df = self.spark.range(1).select(F.user()) + self.assertEqual(df.schema.names[0], "user()") + df = self.spark.range(1).select(F.session_user()) + self.assertEqual(df.schema.names[0], "session_user()") + def test_bucket(self): with self.assertRaises(PySparkTypeError) as pe: F.bucket("5", "id") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 1ae8b19ff63e..8816e84490da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, UnresolvedSeed} +import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, UnresolvedSeed} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke @@ -336,7 +336,8 @@ case class TypeOf(child: Expression) extends UnaryExpression { case class CurrentUser() extends LeafExpression with Unevaluable { override def nullable: Boolean = false override def dataType: DataType = StringType - override def prettyName: String = "current_user" + override def prettyName: String = + getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_user") final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE) } diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 1cdd061e1d3d..053b3c56b29e 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -104,8 +104,8 @@ | org.apache.spark.sql.catalyst.expressions.CurrentTimeZone | current_timezone | SELECT current_timezone() | struct<current_timezone():string> | | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | SELECT current_timestamp() | struct<current_timestamp():timestamp> | | org.apache.spark.sql.catalyst.expressions.CurrentUser | current_user | SELECT current_user() | struct<current_user():string> | -| org.apache.spark.sql.catalyst.expressions.CurrentUser | session_user | SELECT session_user() | struct<current_user():string> | -| org.apache.spark.sql.catalyst.expressions.CurrentUser | user | SELECT user() | struct<current_user():string> | +| org.apache.spark.sql.catalyst.expressions.CurrentUser | session_user | SELECT session_user() | struct<session_user():string> | +| org.apache.spark.sql.catalyst.expressions.CurrentUser | user | SELECT user() | struct<user():string> | | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> | | org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> | | org.apache.spark.sql.catalyst.expressions.DateDiff | date_diff | SELECT date_diff('2009-07-31', '2009-07-30') | struct<date_diff(2009-07-31, 2009-07-30):int> | @@ -254,7 +254,7 @@ | org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> | | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> | | org.apache.spark.sql.catalyst.expressions.RPadExpressionBuilder | rpad | SELECT rpad('hi', 5, '??') | struct<rpad(hi, 5, ??):string> | -| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct<raise_error(USER_RAISED_EXCEPTION, map(errorMessage, custom error message)):void> | +| org.apache.spark.sql.catalyst.expressions.RaiseErrorExpressionBuilder | raise_error | SELECT raise_error('custom error message') | struct<raise_error(USER_RAISED_EXCEPTION, map(errorMessage, custom error message)):void> | | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> | | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> | | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> | --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org