(spark) branch master updated: [SPARK-46337][SQL][CONNECT][PYTHON] Make `CTESubstitution` retain the `PLAN_ID_TAG`

2023-12-08 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 79d1cded8555 [SPARK-46337][SQL][CONNECT][PYTHON] Make 
`CTESubstitution` retain the `PLAN_ID_TAG`
79d1cded8555 is described below

commit 79d1cded8555c5a0cc97b76747753785477eab8f
Author: Ruifeng Zheng 
AuthorDate: Sat Dec 9 16:12:14 2023 +0900

[SPARK-46337][SQL][CONNECT][PYTHON] Make `CTESubstitution` retain the 
`PLAN_ID_TAG`

### What changes were proposed in this pull request?
Make `CTESubstitution` retain the `PLAN_ID_TAG`

### Why are the changes needed?
before this PR:
```
df1 = spark.range(10)
df2 = spark.sql("with dt as (select 1 as ida) select ida as id from dt")
df1.join(df2, df1.id == df2.id)

AnalysisException: When resolving 'id, fail to find subplan with plan_id=2 
in 'Join Inner, '`==`('id, 'id)
:- Range (0, 10, step=1, splits=Some(12))
+- WithCTE
   :- CTERelationDef 4, false
   :  +- SubqueryAlias dt
   : +- Project [1 AS ida#22]
   :+- OneRowRelation
   +- Project [ida#22 AS id#21]
  +- SubqueryAlias dt
 +- CTERelationRef 4, true, [ida#22], false
```

### Does this PR introduce _any_ user-facing change?
yes

### How was this patch tested?
added ut

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #44268 from zhengruifeng/connect_plan_id_cte.

Authored-by: Ruifeng Zheng 
Signed-off-by: Hyukjin Kwon 
---
 python/pyspark/sql/tests/connect/test_connect_basic.py | 14 ++
 .../spark/sql/catalyst/analysis/CTESubstitution.scala  |  9 ++---
 .../sql/catalyst/analysis/ColumnResolutionHelper.scala |  2 +-
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py 
b/python/pyspark/sql/tests/connect/test_connect_basic.py
index 2431b948f9da..32cd4ed62495 100755
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -515,6 +515,20 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase):
 self.assertEqual(cdf7.schema, sdf7.schema)
 self.assertEqual(cdf7.collect(), sdf7.collect())
 
+def test_join_with_cte(self):
+cte_query = "with dt as (select 1 as ida) select ida as id from dt"
+
+sdf1 = self.spark.range(10)
+sdf2 = self.spark.sql(cte_query)
+sdf3 = sdf1.join(sdf2, sdf1.id == sdf2.id)
+
+cdf1 = self.connect.range(10)
+cdf2 = self.connect.sql(cte_query)
+cdf3 = cdf1.join(cdf2, cdf1.id == cdf2.id)
+
+self.assertEqual(sdf3.schema, cdf3.schema)
+self.assertEqual(sdf3.collect(), cdf3.collect())
+
 def test_invalid_column(self):
 # SPARK-41812: fail df1.select(df2.col)
 data1 = [Row(a=1, b=2, c=3)]
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
index 2982d8477fcc..173c9d44a2b3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
@@ -149,10 +149,12 @@ object CTESubstitution extends Rule[LogicalPlan] {
   plan: LogicalPlan,
   cteDefs: ArrayBuffer[CTERelationDef]): LogicalPlan = {
 plan.resolveOperatorsUp {
-  case UnresolvedWith(child, relations) =>
+  case cte @ UnresolvedWith(child, relations) =>
 val resolvedCTERelations =
   resolveCTERelations(relations, isLegacy = true, forceInline = false, 
Seq.empty, cteDefs)
-substituteCTE(child, alwaysInline = true, resolvedCTERelations)
+val substituted = substituteCTE(child, alwaysInline = true, 
resolvedCTERelations)
+substituted.copyTagsFrom(cte)
+substituted
 }
   }
 
@@ -202,7 +204,7 @@ object CTESubstitution extends Rule[LogicalPlan] {
 var firstSubstituted: Option[LogicalPlan] = None
 val newPlan = plan.resolveOperatorsDownWithPruning(
 _.containsAnyPattern(UNRESOLVED_WITH, PLAN_EXPRESSION)) {
-  case UnresolvedWith(child: LogicalPlan, relations) =>
+  case cte @ UnresolvedWith(child: LogicalPlan, relations) =>
 val resolvedCTERelations =
   resolveCTERelations(relations, isLegacy = false, forceInline, 
outerCTEDefs, cteDefs) ++
 outerCTEDefs
@@ -213,6 +215,7 @@ object CTESubstitution extends Rule[LogicalPlan] {
 if (firstSubstituted.isEmpty) {
   firstSubstituted = Some(substituted)
 }
+substituted.copyTagsFrom(cte)
 substituted
 
   case other =>
diff --git 
a/sql/catalyst/src/main/sc

(spark) branch master updated: [SPARK-46334][INFRA][PS] Upgrade `Pandas` to 2.1.4

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 48e773451eeb [SPARK-46334][INFRA][PS] Upgrade `Pandas` to 2.1.4
48e773451eeb is described below

commit 48e773451eeb7feb84b3f11883bd76217f2233c4
Author: Bjørn Jørgensen 
AuthorDate: Fri Dec 8 18:39:10 2023 -0800

[SPARK-46334][INFRA][PS] Upgrade `Pandas` to 2.1.4

### What changes were proposed in this pull request?
Upgrade pandas from 2.1.3 to 2.1.4

### Why are the changes needed?
[Release notes](https://pandas.pydata.org/docs/whatsnew/v2.1.4.html)

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44266 from bjornjorgensen/pandas2.1.4.

Authored-by: Bjørn Jørgensen 
Signed-off-by: Dongjoon Hyun 
---
 dev/infra/Dockerfile   | 4 ++--
 python/pyspark/pandas/supported_api_gen.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index 3e449bcb6c82..cade845d911b 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -92,10 +92,10 @@ RUN mkdir -p /usr/local/pypy/pypy3.8 && \
 ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3.8 && \
 ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
-RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas<=2.1.3' scipy coverage 
matplotlib
+RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas<=2.1.4' scipy coverage 
matplotlib
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=14.0.0 six==1.16.0 pandas<=2.1.3 scipy 
unittest-xml-reporting plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl 
memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=14.0.0 six==1.16.0 pandas<=2.1.4 scipy 
unittest-xml-reporting plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl 
memory-profiler>=0.61.0 scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.59.3 grpcio-status==1.59.3 protobuf==4.25.1 
googleapis-common-protos==1.56.4"
 
diff --git a/python/pyspark/pandas/supported_api_gen.py 
b/python/pyspark/pandas/supported_api_gen.py
index 1f893520d2ce..a598fc816d96 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -37,7 +37,7 @@ from pyspark.pandas.exceptions import 
PandasNotImplementedError
 MAX_MISSING_PARAMS_SIZE = 5
 COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
 MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
-PANDAS_LATEST_VERSION = "2.1.3"
+PANDAS_LATEST_VERSION = "2.1.4"
 
 RST_HEADER = """
 =


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



(spark) branch master updated: [SPARK-46335][BUILD] Upgrade Maven to 3.9.6 for MNG-7913

2023-12-08 Thread yangjie01
This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new b28988dbc6ff [SPARK-46335][BUILD] Upgrade Maven to 3.9.6 for MNG-7913
b28988dbc6ff is described below

commit b28988dbc6ff0d9b95059825a0a77fa6ce5e5c75
Author: Dongjoon Hyun 
AuthorDate: Sat Dec 9 10:12:14 2023 +0800

[SPARK-46335][BUILD] Upgrade Maven to 3.9.6 for MNG-7913

### What changes were proposed in this pull request?

This PR aims to upgrade `Apache Maven` to 3.9.6.

### Why are the changes needed?

Apache Maven 3.9.6 has MNG-7913 to improve Java supports. Although it's 
only for up to Java 19, this is better than the previous condition (up to Java 
14).
- https://github.com/apache/maven/pull/1286

> To version that supports gleaning JSR330 component classes up to Java 19. 
The reasoning of this change is only to allow 3rd party Maven plugins/Maven 
extensions/other JSR330 components (managed by Maven embedded Sisu) to use 
bytecode newer that Java 14 (Sisu 0.3.5).

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44267 from dongjoon-hyun/SPARK-46335.

Authored-by: Dongjoon Hyun 
Signed-off-by: yangjie01 
---
 dev/appveyor-install-dependencies.ps1 | 2 +-
 docs/building-spark.md| 2 +-
 pom.xml   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/appveyor-install-dependencies.ps1 
b/dev/appveyor-install-dependencies.ps1
index d60ec8a47d70..b37f1ee45f30 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -81,7 +81,7 @@ if (!(Test-Path $tools)) {
 # == Maven
 # Push-Location $tools
 #
-# $mavenVer = "3.9.5"
+# $mavenVer = "3.9.6"
 # Start-FileDownload 
"https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip";
 "maven.zip"
 #
 # # extract
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 23d6f49a4fe8..9fea22426978 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -27,7 +27,7 @@ license: |
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.9.5 and Java 17/21.
+Building Spark using Maven requires Maven 3.9.6 and Java 17/21.
 Spark requires Scala 2.13; support for Scala 2.12 was removed in Spark 4.0.0.
 
 ### Setting up Maven's Memory Usage
diff --git a/pom.xml b/pom.xml
index 33cb6cf257f6..c97c74ce5707 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,7 +115,7 @@
 17
 ${java.version}
 ${java.version}
-3.9.5
+3.9.6
 3.1.0
 spark
 9.6


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



(spark) branch branch-3.4 updated: [SPARK-46275][3.4] Protobuf: Return null in permissive mode when deserialization fails

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
 new 4745138601b7 [SPARK-46275][3.4] Protobuf: Return null in permissive 
mode when deserialization fails
4745138601b7 is described below

commit 4745138601b74e805459bd240f748fcf3e7ddec2
Author: Raghu Angadi 
AuthorDate: Fri Dec 8 14:40:03 2023 -0800

[SPARK-46275][3.4] Protobuf: Return null in permissive mode when 
deserialization fails

This is a cherry-pick of #44214 into 3.4 branch.

From the original PR:

### What changes were proposed in this pull request?
This updates the the behavior of `from_protobuf()` built function when 
underlying record fails to deserialize.

  * **Current behvior**:
* By default, this would throw an error and the query fails. [This part 
is not changed in the PR]
* When `mode` is set to 'PERMISSIVE' it returns a non-null struct with 
each of the inner fields set to null e.g. `{ "field_a": null, "field_b": null 
}`  etc.
   * This is not very convenient to the users. They don't know if this 
was due to malformed record or if the input itself has null. It is very hard to 
check for each field for null in SQL query (imagine a sql query with a struct 
that has 10 fields).

  * **New behavior**
* When `mode` is set to 'PERMISSIVE' it simply returns `null`.

### Why are the changes needed?
This makes it easier for users to detect and handle malformed records.

### Does this PR introduce _any_ user-facing change?
Yes, but this does not change the contract. In fact, it clarifies it.

### How was this patch tested?
 - Unit tests are updated.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44265 from rangadi/protobuf-null-3.4.

Authored-by: Raghu Angadi 
Signed-off-by: Dongjoon Hyun 
---
 .../sql/protobuf/ProtobufDataToCatalyst.scala  | 31 --
 .../ProtobufCatalystDataConversionSuite.scala  | 13 +
 2 files changed, 6 insertions(+), 38 deletions(-)

diff --git 
a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala
 
b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala
index da44f94d5eac..78e995190045 100644
--- 
a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala
+++ 
b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala
@@ -21,12 +21,12 @@ import scala.util.control.NonFatal
 
 import com.google.protobuf.DynamicMessage
 
-import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, 
Expression, SpecificInternalRow, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, 
Expression, UnaryExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, 
PermissiveMode}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, 
QueryExecutionErrors}
 import org.apache.spark.sql.protobuf.utils.{ProtobufOptions, ProtobufUtils, 
SchemaConverters}
-import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType, 
StructType}
+import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType}
 
 private[protobuf] case class ProtobufDataToCatalyst(
 child: Expression,
@@ -38,16 +38,8 @@ private[protobuf] case class ProtobufDataToCatalyst(
 
   override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
 
-  override lazy val dataType: DataType = {
-val dt = SchemaConverters.toSqlType(messageDescriptor, 
protobufOptions).dataType
-parseMode match {
-  // With PermissiveMode, the output Catalyst row might contain columns of 
null values for
-  // corrupt records, even if some of the columns are not nullable in the 
user-provided schema.
-  // Therefore we force the schema to be all nullable here.
-  case PermissiveMode => dt.asNullable
-  case _ => dt
-}
-  }
+  override lazy val dataType: DataType =
+SchemaConverters.toSqlType(messageDescriptor, protobufOptions).dataType
 
   override def nullable: Boolean = true
 
@@ -75,22 +67,9 @@ private[protobuf] case class ProtobufDataToCatalyst(
 mode
   }
 
-  @transient private lazy val nullResultRow: Any = dataType match {
-case st: StructType =>
-  val resultRow = new SpecificInternalRow(st.map(_.dataType))
-  for (i <- 0 until st.length) {
-resultRow.setNullAt(i)
-  }
-  resultRow
-
-case _ =>
-  null
-  }
-
   private def handleException(e: Throwable): Any = {
 parseMode match {
-  case PermissiveMode =>
-nullResultRow
+  case PermissiveMode

(spark) branch master updated: [SPARK-46325][CONNECT] Remove unnecessary override functions when constructing `WrappedCloseableIterator` in `ResponseValidator#wrapIterator`

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 75cb37011fa2 [SPARK-46325][CONNECT] Remove unnecessary override 
functions when constructing `WrappedCloseableIterator` in 
`ResponseValidator#wrapIterator`
75cb37011fa2 is described below

commit 75cb37011fa2a1cb00e633fac312eb15ab412df9
Author: yangjie01 
AuthorDate: Fri Dec 8 14:00:28 2023 -0800

[SPARK-46325][CONNECT] Remove unnecessary override functions when 
constructing `WrappedCloseableIterator` in `ResponseValidator#wrapIterator`

### What changes were proposed in this pull request?
This pr removes the overridden `hasNext` and `close` functions in the 
construction of `WrappedCloseableIterator` in `ResponseValidator#wrapIterator`, 
as these functions are identical to those defined in `WrappedCloseableIterator`.

- WrappedCloseableIterator


https://github.com/apache/spark/blob/9ffdcc398ed5560f34778d005da697f6ad0a15ee/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala#L30-L42

- ResponseValidator#wrapIterator


https://github.com/apache/spark/blob/9ffdcc398ed5560f34778d005da697f6ad0a15ee/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala#L62-L85

### Why are the changes needed?
Remove unnecessary override functions.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #44255 from LuciferYang/SPARK-46325.

Authored-by: yangjie01 
Signed-off-by: Dongjoon Hyun 
---
 .../apache/spark/sql/connect/client/ResponseValidator.scala   | 11 ---
 1 file changed, 11 deletions(-)

diff --git 
a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
 
b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
index 2081196d4671..67f29c727ef4 100644
--- 
a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
+++ 
b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
@@ -65,22 +65,11 @@ class ResponseValidator extends Logging {
 
   override def innerIterator: Iterator[T] = inner
 
-  override def hasNext: Boolean = {
-innerIterator.hasNext
-  }
-
   override def next(): T = {
 verifyResponse {
   innerIterator.next()
 }
   }
-
-  override def close(): Unit = {
-innerIterator match {
-  case it: CloseableIterator[T] => it.close()
-  case _ => // nothing
-}
-  }
 }
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



(spark) branch master updated: [SPARK-46332][SQL] Migrate `CatalogNotFoundException` to the error class `CATALOG_NOT_FOUND`

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new d9f0fccd967b [SPARK-46332][SQL] Migrate `CatalogNotFoundException` to 
the error class `CATALOG_NOT_FOUND`
d9f0fccd967b is described below

commit d9f0fccd967b5c8686353d524d2b31e27b7a473b
Author: Max Gekk 
AuthorDate: Fri Dec 8 12:54:20 2023 -0800

[SPARK-46332][SQL] Migrate `CatalogNotFoundException` to the error class 
`CATALOG_NOT_FOUND`

### What changes were proposed in this pull request?
In the PR, I propose to migrate the `CatalogNotFoundException` exception to 
the new error class `CATALOG_NOT_FOUND`, improve the format of the exception 
message, and prohibit creation of the exception without the error class.

### Why are the changes needed?
This is a part of the migration process onto error classes and new error 
framework. The changes improve user experience w/ Spark SQL, and make 
`CatalogNotFoundException` consistent to other Spark exceptions.

### Does this PR introduce _any_ user-facing change?
Yes, if user's code depends on the error message format of 
`CatalogNotFoundException`.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "core/testOnly *SparkThrowableSuite"
```

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44259 from MaxGekk/catalog-plugin-not-found.

Authored-by: Max Gekk 
Signed-off-by: Dongjoon Hyun 
---
 R/pkg/tests/fulltests/test_sparkSQL.R  |  5 +
 common/utils/src/main/resources/error/error-classes.json   |  6 ++
 .../jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala |  6 +++---
 docs/sql-error-conditions.md   |  6 ++
 .../spark/sql/connector/catalog/CatalogNotFoundException.scala | 10 +++---
 .../org/apache/spark/sql/connector/catalog/Catalogs.scala  |  2 +-
 .../org/apache/spark/sql/errors/QueryExecutionErrors.scala |  7 +--
 .../spark/sql/connector/catalog/CatalogLoadingSuite.java   |  7 ++-
 .../spark/sql/catalyst/analysis/TableLookupCacheSuite.scala|  6 +++---
 .../spark/sql/connector/catalog/LookupCatalogSuite.scala   |  5 +++--
 .../org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala  |  6 --
 .../sql/execution/command/AlignAssignmentsSuiteBase.scala  |  5 +++--
 .../spark/sql/execution/command/PlanResolutionSuite.scala  |  9 -
 13 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index f2bef7a00446..0d96f708a544 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -4103,10 +4103,7 @@ test_that("catalog APIs, listCatalogs, 
setCurrentCatalog, currentCatalog", {
   expect_equal(currentCatalog(), "spark_catalog")
   expect_error(setCurrentCatalog("spark_catalog"), NA)
   expect_error(setCurrentCatalog("zxwtyswklpf"),
-   paste0("Error in setCurrentCatalog : ",
-   
"org.apache.spark.sql.connector.catalog.CatalogNotFoundException: ",
-   "Catalog 'zxwtyswklpf' plugin class not found: ",
-   "spark.sql.catalog.zxwtyswklpf is not defined"))
+   "[CATALOG_NOT_FOUND]*`zxwtyswklpf`*")
   catalogs <- collect(listCatalogs())
 })
 
diff --git a/common/utils/src/main/resources/error/error-classes.json 
b/common/utils/src/main/resources/error/error-classes.json
index 7a672fa5e557..62d10c0d34cb 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -407,6 +407,12 @@
 ],
 "sqlState" : "22003"
   },
+  "CATALOG_NOT_FOUND" : {
+"message" : [
+  "The catalog  not found. Consider to set the SQL config 
 to a catalog plugin."
+],
+"sqlState" : "42P08"
+  },
   "CHECKPOINT_RDD_BLOCK_ID_NOT_FOUND" : {
 "message" : [
   "Checkpoint block  not found!",
diff --git 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
index cefa63ecd353..d646fad00c07 100644
--- 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
+++ 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
@@ -66,10 +66,10 @@ class CatalogSuite extends RemoteSparkSession with 
SQLHelper {
   val catalogs = spark.catalog.listCatalogs().collect()
   assert(catalogs.length == 1)
   assert(catalogs.map(_.name) sameElements Array("spark_catalog"))
-  val message = intercept[SparkException] {
+  val exception = intercept[SparkException] {
 spark.catalog.setCurrentCatalog

(spark) branch master updated: [SPARK-46324][SQL][PYTHON] Fix the output name of pyspark.sql.functions.user and session_user

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 3224cddcf9da [SPARK-46324][SQL][PYTHON] Fix the output name of 
pyspark.sql.functions.user and session_user
3224cddcf9da is described below

commit 3224cddcf9da913c964b775b5912a67cd1e968b2
Author: Hyukjin Kwon 
AuthorDate: Fri Dec 8 12:48:24 2023 -0800

[SPARK-46324][SQL][PYTHON] Fix the output name of 
pyspark.sql.functions.user and session_user

### What changes were proposed in this pull request?

This PR proposes to fix `user()` and `session_user()` to have the same 
names in its output name.

### Why are the changes needed?

To show the correct name of the functions being used.

### Does this PR introduce _any_ user-facing change?

Yes.

```scala
spark.range(1).select(user(), session_user()).show()
```

Before:

```
+--+--+
|current_user()|current_user()|
+--+--+
|  hyukjin.kwon|  hyukjin.kwon|
+--+--+
```

After:

```
+--+--+
|user()|session_user()|
+--+--+
|  hyukjin.kwon|  hyukjin.kwon|
+--+--+
```

### How was this patch tested?

Manually tested, and unittests were added.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44253 from HyukjinKwon/user-name.

Authored-by: Hyukjin Kwon 
Signed-off-by: Dongjoon Hyun 
---
 .../query-tests/explain-results/function_session_user.explain| 2 +-
 .../resources/query-tests/explain-results/function_user.explain  | 2 +-
 python/pyspark/sql/functions/builtin.py  | 4 ++--
 python/pyspark/sql/tests/test_functions.py   | 9 +
 .../scala/org/apache/spark/sql/catalyst/expressions/misc.scala   | 5 +++--
 .../src/test/resources/sql-functions/sql-expression-schema.md| 6 +++---
 6 files changed, 19 insertions(+), 9 deletions(-)

diff --git 
a/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
 
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
index 82f5d2adcec0..b6205d9fb56c 100644
--- 
a/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
+++ 
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
@@ -1,2 +1,2 @@
-Project [current_user() AS current_user()#0]
+Project [session_user() AS session_user()#0]
 +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git 
a/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
 
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
index 82f5d2adcec0..52746c58c000 100644
--- 
a/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
+++ 
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
@@ -1,2 +1,2 @@
-Project [current_user() AS current_user()#0]
+Project [user() AS user()#0]
 +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/python/pyspark/sql/functions/builtin.py 
b/python/pyspark/sql/functions/builtin.py
index 997b641080cf..e1c01018 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -8967,7 +8967,7 @@ def user() -> Column:
 >>> import pyspark.sql.functions as sf
 >>> spark.range(1).select(sf.user()).show() # doctest: +SKIP
 +--+
-|current_user()|
+|user()|
 +--+
 | ruifeng.zheng|
 +--+
@@ -8986,7 +8986,7 @@ def session_user() -> Column:
 >>> import pyspark.sql.functions as sf
 >>> spark.range(1).select(sf.session_user()).show() # doctest: +SKIP
 +--+
-|current_user()|
+|session_user()|
 +--+
 | ruifeng.zheng|
 +--+
diff --git a/python/pyspark/sql/tests/test_functions.py 
b/python/pyspark/sql/tests/test_functions.py
index b59417d8a310..5352ee04d7fe 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -1355,6 +1355,15 @@ class FunctionsTestsMixin:
 message_parameters={"arg_name": "gapDuration", "arg_type": "int"},
 )
 
+def test_current_user(self):
+df = self.spark.range(1).select(F.current_user())
+self.assertIsInstance(df.first()[0], str)
+self.assertEqual(df.schema.names[0], "current_user()")
+df = self.spark.range(1).select(F.user())
+   

svn commit: r65952 - in /dev/spark/v3.3.4-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.3.1/ _site/api/R/deps/bootstrap-5.3.1/fonts/

2023-12-08 Thread dongjoon
Author: dongjoon
Date: Fri Dec  8 20:21:58 2023
New Revision: 65952

Log:
Apache Spark v3.3.4-rc1 docs


[This commit notification would consist of 2683 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



svn commit: r65951 - /dev/spark/v3.4.2-rc1-docs/

2023-12-08 Thread dongjoon
Author: dongjoon
Date: Fri Dec  8 19:49:07 2023
New Revision: 65951

Log:
Remove Apache Spark 3.4.2 RC1 docs after 3.4.2 release

Removed:
dev/spark/v3.4.2-rc1-docs/


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



svn commit: r65950 - /dev/spark/v3.3.4-rc1-bin/

2023-12-08 Thread dongjoon
Author: dongjoon
Date: Fri Dec  8 19:47:54 2023
New Revision: 65950

Log:
Apache Spark v3.3.4-rc1

Added:
dev/spark/v3.3.4-rc1-bin/
dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz   (with props)
dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz.asc
dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz.sha512
dev/spark/v3.3.4-rc1-bin/pyspark-3.3.4.tar.gz   (with props)
dev/spark/v3.3.4-rc1-bin/pyspark-3.3.4.tar.gz.asc
dev/spark/v3.3.4-rc1-bin/pyspark-3.3.4.tar.gz.sha512
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-hadoop2.tgz   (with props)
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-hadoop2.tgz.asc
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-hadoop2.tgz.sha512
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-hadoop3-scala2.13.tgz   (with 
props)
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-hadoop3-scala2.13.tgz.asc
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-hadoop3-scala2.13.tgz.sha512
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-hadoop3.tgz   (with props)
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-hadoop3.tgz.asc
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-hadoop3.tgz.sha512
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-without-hadoop.tgz   (with props)
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-without-hadoop.tgz.asc
dev/spark/v3.3.4-rc1-bin/spark-3.3.4-bin-without-hadoop.tgz.sha512
dev/spark/v3.3.4-rc1-bin/spark-3.3.4.tgz   (with props)
dev/spark/v3.3.4-rc1-bin/spark-3.3.4.tgz.asc
dev/spark/v3.3.4-rc1-bin/spark-3.3.4.tgz.sha512

Added: dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz.asc
==
--- dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz.asc (added)
+++ dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz.asc Fri Dec  8 19:47:54 2023
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJIBAABCgAyFiEE8oycklwYjDXjRWFN7aAM6DTw/FwFAmVzcocUHGRvbmdqb29u
+QGFwYWNoZS5vcmcACgkQ7aAM6DTw/FxycA//Vd2YOoAE4QBm3xcAVuHdGesXVBhG
+Ja7TIc2WLkxi4P/wRKi6vYtacgMuayygetDqIDNOv8w/xk20omwLvA7ComVv2kbz
+abIyWOZggUOhOumYY5FyIskEZBygCiTgWqyZSSi13tj8DT2bUL931N4J/E+B8V+/
+tY77UvemmjNOxRvzmqDnZz3pp+faJ5J18UDMXG4J0atM7eLjT7vEpL4cAHUDuYGO
+YHvwTxrUJxFyGafNkAUrFdAQmHaW+ULKawHPTBfex/sdojUqkQ0oiC7Z6th2ZpZ4
+R+yrs0GxaMaMLseifiK7sy6t/cHmGNghAPo0bl+sVe1rY8NDN075eaHOXQ5I+WqL
+dNBarAEHJ7s50ClVNQtUIu4QWE7lydrW2ykRfKHAa0v5Qffe+vJGBmmv/RHRa2Wi
+u9mohwbu3i6q1SRJFz2tpSF1ZxeAPshAy+/TMH0BDznf7lNcca/dFZGmgWNL6B4S
+5YCbtK5JZ6JUjcVUSFi/1qQy2PfjDPmTYYDBqPNzx8M7kVXx2mbvWwslC0+KdnBK
+OpMiSCSR5p3jz9sekKAI96vtzX5Q/zJc/+0J+wpk8Gpe5K2ajOG5oAbH0WQn5ex4
+rZ3eh9NH+VH5szmA9ZoJe7vf9fpLk3I2PPAhAoZPRMnx048+/kyzmsisJ52h14Tj
+/b6FUTAqwOGzM8I=
+=b7JY
+-END PGP SIGNATURE-

Added: dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz.sha512
==
--- dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz.sha512 (added)
+++ dev/spark/v3.3.4-rc1-bin/SparkR_3.3.4.tar.gz.sha512 Fri Dec  8 19:47:54 2023
@@ -0,0 +1 @@
+33b8848b7b409302945e4ef91ff254507e5f3766ad19404473889ce9ce5d88c5fa9d6a806d1120c8f43c16137a76426183af43e974a2f0bb12cfefce566fb048
  SparkR_3.3.4.tar.gz

Added: dev/spark/v3.3.4-rc1-bin/pyspark-3.3.4.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.4-rc1-bin/pyspark-3.3.4.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.4-rc1-bin/pyspark-3.3.4.tar.gz.asc
==
--- dev/spark/v3.3.4-rc1-bin/pyspark-3.3.4.tar.gz.asc (added)
+++ dev/spark/v3.3.4-rc1-bin/pyspark-3.3.4.tar.gz.asc Fri Dec  8 19:47:54 2023
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJIBAABCgAyFiEE8oycklwYjDXjRWFN7aAM6DTw/FwFAmVzcokUHGRvbmdqb29u
+QGFwYWNoZS5vcmcACgkQ7aAM6DTw/FwUPQ//ST9CejnIpZvrG8L6IEDk/G2QMPZR
+LaFUy3DVdZsJ6JBdbo4wQWYl51CO5OvUzYqJC0WYtSdkOS34FagSccwjSg3mzhIa
+FObQ86fdYk9wk+2fJl7+jTV6z1SZ99lRjLlN2JyFJLICm+SPVekCDMBmTDS/Ohrg
+AVm/yMwjrtSs2QzxlmUIF+8IrriFHQtKUENx1kb0aYWSHgarGqJvzB6UcBOzAter
+47vjj0nCfTSCafqyME2TZ6XVopOtrUK+ZENH9MXv1VyKfp4F6tLLMOf7ZyOa44S1
+PfEEOO+SllP9/nW30ydRtcD3ZsMT8YL6+sOuBt+y7nc8c6cJKVUjC62kAIA0yTga
+XJU8fu+vT6HGELjME3V0LAiOKZ/IIhICgJENZRHXPVFZPa+gWSBhTGQyZD5e0ujD
+iXr5Rrenp17fwAKtt1OeuJDkAht1J4Z/2zt6VzlqbM2GR0EbtDIbNl0TMaFoRkmH
+HzwSsY2AsKCks4Ze7DBp9S0Pji93cWybqndm/SDEIRXSKegRus/i+8SFJv03IlZr
+Mzkn7IW1RiM17SFd29q5hYyVyZ7Zff40dqYN7rkGWXQBul4U8vRI3WGeOrSB2Oce
+jEqgBA1gArm+iojSkAdiAlrDgudNZViTLg0kUtCcdYSb3qFFtRAWEEBXvAAbMeBU
+wSHbZ1fFgEELKNk=
+=KLJ9
+-END PGP SIGNATURE-

Added: dev/spark/v3.3

(spark) branch master updated: [SPARK-46328][SQL] Allocate capacity of array list of TColumns by columns size in TRowSet generation

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 20c9b3dc4fac [SPARK-46328][SQL] Allocate capacity of array list of 
TColumns by columns size in TRowSet generation
20c9b3dc4fac is described below

commit 20c9b3dc4fac283f895c8d860b4c6e0144697302
Author: liangbowen 
AuthorDate: Fri Dec 8 11:24:35 2023 -0800

[SPARK-46328][SQL] Allocate capacity of array list of TColumns by columns 
size in TRowSet generation

### What changes were proposed in this pull request?

Allocate enough capacity by columns size for assembling array list of 
TColumns in TRowSet generation.

### Why are the changes needed?

ArrayLists is created for TColumn value collections in RowSetUtils for 
TRowSet generation. Currently, they are created with Java's default capacity of 
16, rather than by the number of columns, which could cause array copying in 
assembling each TColumn collection when the column number exceeds the default 
capacity.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

GA tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44258 from bowenliang123/rowset-cap.

Authored-by: liangbowen 
Signed-off-by: Dongjoon Hyun 
---
 .../org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
index 94046adca0d8..502e29619027 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
@@ -57,15 +57,16 @@ object RowSetUtils {
 val tRows = new java.util.ArrayList[TRow](rowSize)
 while (i < rowSize) {
   val row = rows(i)
-  val tRow = new TRow()
   var j = 0
   val columnSize = row.length
+  val tColumnValues = new java.util.ArrayList[TColumnValue](columnSize)
   while (j < columnSize) {
 val columnValue = toTColumnValue(j, row, schema(j), timeFormatters)
-tRow.addToColVals(columnValue)
+tColumnValues.add(columnValue)
 j += 1
   }
   i += 1
+  val tRow = new TRow(tColumnValues)
   tRows.add(tRow)
 }
 new TRowSet(startRowOffSet, tRows)
@@ -80,11 +81,13 @@ object RowSetUtils {
 val tRowSet = new TRowSet(startRowOffSet, new 
java.util.ArrayList[TRow](rowSize))
 var i = 0
 val columnSize = schema.length
+val tColumns = new java.util.ArrayList[TColumn](columnSize)
 while (i < columnSize) {
   val tColumn = toTColumn(rows, i, schema(i), timeFormatters)
-  tRowSet.addToColumns(tColumn)
+  tColumns.add(tColumn)
   i += 1
 }
+tRowSet.setColumns(tColumns)
 tRowSet
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



(spark) 01/01: Preparing development version 3.3.5-SNAPSHOT

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 0837e78d9cee986f3f791072df2488947dcf0b9a
Author: Dongjoon Hyun 
AuthorDate: Fri Dec 8 18:28:19 2023 +

Preparing development version 3.3.5-SNAPSHOT
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 python/pyspark/version.py  | 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 39 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ac01697363e4..6c1d59584cd0 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.4
+Version: 3.3.5
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index e3e15e41efb8..11a746d6701d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4
+3.3.5-SNAPSHOT
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 66fc33f5bb0b..99f36c865fba 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4
+3.3.5-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index fd81fe02bb03..e67d16c8eb9a 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4
+3.3.5-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 9de437c7291a..44432ad56ce2 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4
+3.3.5-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 919717f81a94..6a9377402e77 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4
+3.3.5-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 429544784335..e6baac59e0f2 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4
+3.3.5-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/ta

(spark) 01/01: Preparing Spark release v3.3.4-rc1

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to tag v3.3.4-rc1
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 18db204995b32e87a650f2f09f9bcf047ddafa90
Author: Dongjoon Hyun 
AuthorDate: Fri Dec 8 18:28:15 2023 +

Preparing Spark release v3.3.4-rc1
---
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 2 +-
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 python/pyspark/version.py  | 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index b3d7ee94bb3c..e3e15e41efb8 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4-SNAPSHOT
+3.3.4
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 8a8d44dfd941..66fc33f5bb0b 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4-SNAPSHOT
+3.3.4
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index e76b78305aa7..fd81fe02bb03 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4-SNAPSHOT
+3.3.4
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 92324bf17469..9de437c7291a 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4-SNAPSHOT
+3.3.4
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 6eaf00a3dca6..919717f81a94 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4-SNAPSHOT
+3.3.4
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 3fcff5046da0..429544784335 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4-SNAPSHOT
+3.3.4
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index c04f0eb556e9..6a3243424567 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.4-SNAPSHOT
+3.3.4
 ../../pom.xml
   
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 84a5d404faec..0675a3554c47 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-

(spark) branch branch-3.3 updated (6a4488f2f486 -> 0837e78d9cee)

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from 6a4488f2f486 [SPARK-45580][SQL][3.3] Handle case where a nested 
subquery becomes an existence join
 add 18db204995b3 Preparing Spark release v3.3.4-rc1
 new 0837e78d9cee Preparing development version 3.3.5-SNAPSHOT

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 python/pyspark/version.py  | 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 39 files changed, 41 insertions(+), 41 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



(spark) tag v3.3.4-rc1 created (now 18db204995b3)

2023-12-08 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to tag v3.3.4-rc1
in repository https://gitbox.apache.org/repos/asf/spark.git


  at 18db204995b3 (commit)
This tag includes the following new commits:

 new 18db204995b3 Preparing Spark release v3.3.4-rc1

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



(spark) branch master updated: [SPARK-46327][PS][CONNECT][TESTS] Reorganize `SeriesStringTests`

2023-12-08 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 20df062d85e8 [SPARK-46327][PS][CONNECT][TESTS] Reorganize 
`SeriesStringTests`
20df062d85e8 is described below

commit 20df062d85e80422a55afae80ddbf2060f26516c
Author: Ruifeng Zheng 
AuthorDate: Fri Dec 8 21:08:50 2023 +0900

[SPARK-46327][PS][CONNECT][TESTS] Reorganize `SeriesStringTests`

### What changes were proposed in this pull request?
Reorganize `SeriesStringTests`

### Why are the changes needed?
test code clean up

### Does this PR introduce _any_ user-facing change?
no, test-only

### How was this patch tested?
ci

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #44257 from zhengruifeng/ps_test_ser_str.

Authored-by: Ruifeng Zheng 
Signed-off-by: Hyukjin Kwon 
---
 dev/sparktestsupport/modules.py|   6 +-
 .../test_parity_string_ops_adv.py} |   8 +-
 .../test_parity_string_ops_basic.py}   |   8 +-
 .../test_string_ops_adv.py}| 125 +-
 .../pandas/tests/series/test_string_ops_basic.py   | 184 +
 5 files changed, 199 insertions(+), 132 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 834b3bd235aa..e67cfce0f5c0 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -752,7 +752,8 @@ pyspark_pandas = Module(
 "pyspark.pandas.tests.test_scalars",
 "pyspark.pandas.tests.test_series_conversion",
 "pyspark.pandas.tests.test_series_datetime",
-"pyspark.pandas.tests.test_series_string",
+"pyspark.pandas.tests.series.test_string_ops_adv",
+"pyspark.pandas.tests.series.test_string_ops_basic",
 "pyspark.pandas.tests.test_spark_functions",
 "pyspark.pandas.tests.test_sql",
 "pyspark.pandas.tests.test_typedef",
@@ -1005,7 +1006,8 @@ pyspark_pandas_connect_part0 = Module(
 "pyspark.pandas.tests.connect.test_parity_scalars",
 "pyspark.pandas.tests.connect.test_parity_series_conversion",
 "pyspark.pandas.tests.connect.test_parity_series_datetime",
-"pyspark.pandas.tests.connect.test_parity_series_string",
+"pyspark.pandas.tests.connect.series.test_parity_string_ops_adv",
+"pyspark.pandas.tests.connect.series.test_parity_string_ops_basic",
 "pyspark.pandas.tests.connect.test_parity_spark_functions",
 "pyspark.pandas.tests.connect.test_parity_sql",
 "pyspark.pandas.tests.connect.test_parity_typedef",
diff --git a/python/pyspark/pandas/tests/connect/test_parity_series_string.py 
b/python/pyspark/pandas/tests/connect/series/test_parity_string_ops_adv.py
similarity index 80%
copy from python/pyspark/pandas/tests/connect/test_parity_series_string.py
copy to python/pyspark/pandas/tests/connect/series/test_parity_string_ops_adv.py
index d7c0335f15c7..1213ae073cf5 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_series_string.py
+++ b/python/pyspark/pandas/tests/connect/series/test_parity_string_ops_adv.py
@@ -16,19 +16,19 @@
 #
 import unittest
 
-from pyspark.pandas.tests.test_series_string import SeriesStringTestsMixin
+from pyspark.pandas.tests.series.test_string_ops_adv import 
SeriesStringOpsAdvMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class SeriesStringParityTests(
-SeriesStringTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase
+class SeriesParityStringOpsAdvTests(
+SeriesStringOpsAdvMixin, PandasOnSparkTestUtils, ReusedConnectTestCase
 ):
 pass
 
 
 if __name__ == "__main__":
-from pyspark.pandas.tests.connect.test_parity_series_string import *  # 
noqa: F401
+from pyspark.pandas.tests.connect.series.test_parity_string_ops_adv import 
*  # noqa: F401
 
 try:
 import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/pandas/tests/connect/test_parity_series_string.py 
b/python/pyspark/pandas/tests/connect/series/test_parity_string_ops_basic.py
similarity index 81%
rename from python/pyspark/pandas/tests/connect/test_parity_series_string.py
rename to 
python/pyspark/pandas/tests/connect/series/test_parity_string_ops_basic.py
index d7c0335f15c7..58f10fa505b0 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_series_string.py
+++ b/python/pyspark/pandas/tests/connect/series/test_parity_string_ops_basic.py
@@ -16,19 +16,19 @@
 #
 import unittest
 
-from pyspark.pandas.tests.test_series_string import SeriesStringTestsMixin
+from pyspark.pandas.tests.series.test_string_ops_basic import 
SeriesStringOpsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 

(spark) branch master updated: [SPARK-46326][PYTHON][TESTS] Test missing cases for functions (pyspark.sql.functions)

2023-12-08 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new f41bacb07d04 [SPARK-46326][PYTHON][TESTS] Test missing cases for 
functions (pyspark.sql.functions)
f41bacb07d04 is described below

commit f41bacb07d04b21d66c0826420a67da41536e445
Author: Hyukjin Kwon 
AuthorDate: Fri Dec 8 20:51:25 2023 +0900

[SPARK-46326][PYTHON][TESTS] Test missing cases for functions 
(pyspark.sql.functions)

### What changes were proposed in this pull request?

This PR proposes to improve test coverage by adding the tests for full test 
coverage in `pyspark.sql.functions`. In addition, this PR improves the examples 
by adding doctests a little bit.

### Why are the changes needed?

For better test coverage, to avoid regressions.

They are not being tested: 
https://app.codecov.io/gh/apache/spark/blob/master/python%2Fpyspark%2Fsql%2Fsession.py

### Does this PR introduce _any_ user-facing change?

It contains a bit of docstring improvement. Otherwise, test-only.

### How was this patch tested?

Manually tested the unittests via:

```bash
./python/run-tests --python-executables=python3  --testnames 
'pyspark.sql.tests.connect.test_parity_functions'
./python/run-tests --python-executables=python3  --testnames 
'pyspark.sql.tests.test_functions'
./python/run-tests --python-executables=python3  --testnames 
'pyspark.sql.functions.builtin'
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44256 from HyukjinKwon/dataframe-test.

Authored-by: Hyukjin Kwon 
Signed-off-by: Hyukjin Kwon 
---
 python/pyspark/sql/functions/builtin.py| 78 ++
 python/pyspark/sql/tests/test_functions.py | 33 -
 2 files changed, 101 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/sql/functions/builtin.py 
b/python/pyspark/sql/functions/builtin.py
index 4f8e6a8e1d14..997b641080cf 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -8279,9 +8279,40 @@ def unix_timestamp(
 Examples
 
 >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+
+Example 1: Returns the current timestamp in UNIX.
+
+>>> import pyspark.sql.functions as sf
+>>> spark.range(1).select(sf.unix_timestamp().alias('unix_time')).show()
+... # doctest: +SKIP
++--+
+| unix_time|
++--+
+|1702018137|
++--+
+
+Example 2: Using default format '-MM-dd HH:mm:ss' parses the timestamp 
string.
+
+>>> import pyspark.sql.functions as sf
+>>> time_df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['dt'])
+>>> time_df.select(sf.unix_timestamp('dt').alias('unix_time')).show()
++--+
+| unix_time|
++--+
+|1428520332|
++--+
+
+Example 3: Using user-specified format '-MM-dd' parses the timestamp 
string.
+
+>>> import pyspark.sql.functions as sf
 >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
->>> time_df.select(unix_timestamp('dt', 
'-MM-dd').alias('unix_time')).collect()
-[Row(unix_time=1428476400)]
+>>> time_df.select(sf.unix_timestamp('dt', 
'-MM-dd').alias('unix_time')).show()
++--+
+| unix_time|
++--+
+|1428476400|
++--+
+
 >>> spark.conf.unset("spark.sql.session.timeZone")
 """
 if timestamp is None:
@@ -8569,13 +8600,21 @@ def window(
 Examples
 
 >>> import datetime
+>>> from pyspark.sql import functions as sf
 >>> df = spark.createDataFrame(
 ... [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)],
 ... ).toDF("date", "val")
->>> w = df.groupBy(window("date", "5 
seconds")).agg(sum("val").alias("sum"))
->>> w.select(w.window.start.cast("string").alias("start"),
-...  w.window.end.cast("string").alias("end"), "sum").collect()
-[Row(start='2016-03-11 09:00:05', end='2016-03-11 09:00:10', sum=1)]
+>>> w = df.groupBy(sf.window("date", "5 
seconds")).agg(sf.sum("val").alias("sum"))
+>>> w.select(
+... w.window.start.cast("string").alias("start"),
+... w.window.end.cast("string").alias("end"),
+... "sum"
+... ).show()
++---+---+---+
+|  start|end|sum|
++---+---+---+
+|2016-03-11 09:00:05|2016-03-11 09:00:10|  1|
++---+---+---+
 """
 
 def check_string_field(field, fieldName):  # type: ignore[no-untyped-def]
@@ -8737,9 +8776,30 @@ def to_unix_timestamp(
 Examples
 
 >>> spark.conf.set("spark.sql.session.timeZo