date:20220914

svn commit: r56822 - in /dev/spark/v3.3.1-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.3/ _site/api/R/deps/jquery-3.6.0/ _site/api

2022-09-14 Thread yumwang

Author: yumwang
Date: Thu Sep 15 06:07:25 2022
New Revision: 56822

Log:
Apache Spark v3.3.1-rc1 docs


[This commit notification would consist of 2684 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-40429][SQL] Only set KeyGroupedPartitioning when the referenced column is in the output

2022-09-14 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 034e48fd47f [SPARK-40429][SQL] Only set KeyGroupedPartitioning when 
the referenced column is in the output
034e48fd47f is described below

commit 034e48fd47f49a603c1cad507608958f5beeddc8
Author: huaxingao 
AuthorDate: Wed Sep 14 23:06:22 2022 -0700

[SPARK-40429][SQL] Only set KeyGroupedPartitioning when the referenced 
column is in the output

### What changes were proposed in this pull request?
Only set `KeyGroupedPartitioning` when the referenced column is in the 
output

### Why are the changes needed?
bug fixing

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
new test

Closes #37886 from huaxingao/keyGroupedPartitioning.

Authored-by: huaxingao 
Signed-off-by: Dongjoon Hyun 
---
 .../datasources/v2/V2ScanPartitioningAndOrdering.scala   | 14 --
 .../apache/spark/sql/connector/MetadataColumnSuite.scala | 16 
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala
index 8ab0dc70726..5c8c7cf420d 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala
@@ -41,8 +41,18 @@ object V2ScanPartitioningAndOrdering extends 
Rule[LogicalPlan] with SQLConfHelpe
   private def partitioning(plan: LogicalPlan) = plan.transformDown {
 case d @ DataSourceV2ScanRelation(relation, scan: 
SupportsReportPartitioning, _, None, _) =>
   val catalystPartitioning = scan.outputPartitioning() match {
-case kgp: KeyGroupedPartitioning => sequenceToOption(kgp.keys().map(
-  V2ExpressionUtils.toCatalystOpt(_, relation, relation.funCatalog)))
+case kgp: KeyGroupedPartitioning =>
+  val partitioning = sequenceToOption(
+kgp.keys().map(V2ExpressionUtils.toCatalystOpt(_, relation, 
relation.funCatalog)))
+  if (partitioning.isEmpty) {
+None
+  } else {
+if (partitioning.get.forall(p => 
p.references.subsetOf(d.outputSet))) {
+  partitioning
+} else {
+  None
+}
+  }
 case _: UnknownPartitioning => None
 case p => throw new IllegalArgumentException("Unsupported data source 
V2 partitioning " +
 "type: " + p.getClass.getSimpleName)
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
index 9b90ee43657..8454b9f85ec 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
@@ -216,4 +216,20 @@ class MetadataColumnSuite extends DatasourceV2SQLBase {
   .withColumn("right_all", struct($"right.*"))
 checkAnswer(dfQuery, Row(1, "a", "b", Row(1, "a"), Row(1, "b")))
   }
+
+  test("SPARK-40429: Only set KeyGroupedPartitioning when the referenced 
column is in the output") {
+withTable(tbl) {
+  sql(s"CREATE TABLE $tbl (id bigint, data string) PARTITIONED BY (id)")
+  sql(s"INSERT INTO $tbl VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+  checkAnswer(
+spark.table(tbl).select("index", "_partition"),
+Seq(Row(0, "3"), Row(0, "2"), Row(0, "1"))
+  )
+
+  checkAnswer(
+spark.table(tbl).select("id", "index", "_partition"),
+Seq(Row(3, 0, "3"), Row(2, 0, "2"), Row(1, 0, "1"))
+  )
+}
+  }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r56821 - /dev/spark/v3.3.1-rc1-bin/

2022-09-14 Thread yumwang

Author: yumwang
Date: Thu Sep 15 03:59:44 2022
New Revision: 56821

Log:
Apache Spark v3.3.1-rc1

Added:
dev/spark/v3.3.1-rc1-bin/
dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz   (with props)
dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.asc
dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.sha512
dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz   (with props)
dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.asc
dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.sha512
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop2.tgz   (with props)
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop2.tgz.asc
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop2.tgz.sha512
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3-scala2.13.tgz   (with 
props)
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3-scala2.13.tgz.asc
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3-scala2.13.tgz.sha512
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3.tgz   (with props)
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3.tgz.asc
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3.tgz.sha512
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-without-hadoop.tgz   (with props)
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-without-hadoop.tgz.asc
dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-without-hadoop.tgz.sha512
dev/spark/v3.3.1-rc1-bin/spark-3.3.1.tgz   (with props)
dev/spark/v3.3.1-rc1-bin/spark-3.3.1.tgz.asc
dev/spark/v3.3.1-rc1-bin/spark-3.3.1.tgz.sha512

Added: dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.asc
==
--- dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.asc (added)
+++ dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.asc Thu Sep 15 03:59:44 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEhnJ9Q+c6QV9noLGhTmiz5s1HNlMFAmMimi0THHl1bXdhbmdA
+YXBhY2hlLm9yZwAKCRBOaLPmzUc2U9WvEADMuirrnIIRcDuUKIkSSzNe+jnGwSVI
+H/9WaRp9V/JG4pb5p/9RN9f7t7UQ1mb6s2VxzhJ3CqSytvzLtXIH54/zbykLua00
+HSqLqnrBmR8xFof8aS+qP+2M0gZDmHwG1xZh39nHYU25mAyP0hj4AYSfvloG8/Nk
+oyx5kqJe7jVA6FaXoIcgxFWJ8wGW8D5TrIP3Z42+sAEoVDNTJGIkDEFQrOSYt7zz
++tuW2mHIucJRYwCTs++tSIlybr1LHsqiTE4FoFn0h3Dhy6TfOxlyT0uHI2Si/JqU
+EzFc23r6sNlP+IjJsYEqrVc/q6tkOEiqGNnIrV1srNAyTAEo53+v7mypCq8qUhSx
+hyz+VACeGXmHruu7hna5G7RkxSwq+IJQZHr3Q87RIoYmHOt7IUe3C5BgYkLHT/tw
+sSqkNtQCDpW28vnPJ6nmn2rN+Cfy7rCj8Olp/QaN1EkaMWMgTfWZPNzsFC/NUFjq
+pVtaNeuqMUPOErUHRMSd0B9vVqv4cLtdMQmw0AkfnvS0L3CTeCm7Ym6ucND7fFn1
+9I3p+tTg/G0ugcuzOMEvn/w0WmKop2ryItp8+K7s/U8mNWpz6X8sfY+BCXR+Lagj
+/IXu0DoVHaEFTn9Ezz53fnRICTfyNPSDlPN95ECsj0dEZ2WBMz9ryQYUmTvD2jm4
+pim/spRfoTjCUQ==
+=QnuQ
+-END PGP SIGNATURE-

Added: dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.sha512
==
--- dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.sha512 (added)
+++ dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.sha512 Thu Sep 15 03:59:44 2022
@@ -0,0 +1 @@
+d1436ba8a20af88e77999034e5cd854be3b5edbfeb00c7da619cc9e35616b1bfe4f67476d1994fc6cca2f1590ad3c5fd13dc16cc752a989dfdefd08fa38bd321
  SparkR_3.3.1.tar.gz

Added: dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.asc
==
--- dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.asc (added)
+++ dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.asc Thu Sep 15 03:59:44 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEhnJ9Q+c6QV9noLGhTmiz5s1HNlMFAmMimjITHHl1bXdhbmdA
+YXBhY2hlLm9yZwAKCRBOaLPmzUc2U8eUD/sF0jnPVUqnDuoRUscfaa9KhmGCU7dH
+ARkF3R4+yPTCM6FmSK6r1OjeOiHjjyVe6g3VvMbv7vt0C28SV7E3uluIc27WV14J
+rFNva0D48//O3eE7pNu+oxDYxuwJDQsM6ke/v/YKykf1P8YyVqCB3OGlEIJeSNKM
+UPSr8Z7KY93GbOmqN1YDx5g7O2ChRjlBnpp+tZ6/+8XxoLPtzDsGR1NwTxMylhgb
+ZAtDXcmre6zZYghGWbcwKxGnY7NA2qfdHsiCN+fECvd27Rhj9BIb3luyj77zA5eJ
+1cVifPZi3ulmYewRPa1Xa1bh2aCAWCugVoT1CP23AS21Nxa9TAr72ZTW/QVUce1B
+S7ZKk5rGHnVz+vyiVrgSwxB7fg7opoFXif0gtXELwshLhbVXXfQ71cP/XPjrl0k2
+OpQla4bU5nIlgOB8RI7AFvOyJehowXMvjI/mM1nE4GeFSIPdrHQOMvo45zWNd+jP
+sjHEjf4sHT309e0eY8sTmYrG3Wu7cIkuIjHUZvZfWGEw5MESQJp30fuG6THS527p
+p1QWcfr7YAjKTClSuiNpMgQrDEjXteFUH+iXzgEDSNTy0rmITYh/m6PHgobhOuGz
+fqpceuJDS+U8VGo0tGEK5knFb2QAwhGSQHctA3vVqlkjpy/7OLJF/uwFVx0u+Yuh
+MdAa1X3Q66/DWQ==
+=FrlC
+-END PGP SIGNATURE-

Added: dev/spark/v3.3.

[spark] branch master updated (1c46c87ddb1 -> 0ea17c4d3c3)

2022-09-14 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 1c46c87ddb1 [SPARK-40421][PS] Make `spearman` correlation in 
`DataFrame.corr` support missing values and `min_periods`
 add 0ea17c4d3c3 [SPARK-40339][SPARK-40342][PS][DOCS][FOLLOW-UP] Add 
Rolling.quantile and Expanding.quantile into PySpark documentation

No new revisions were added by this update.

Summary of changes:
 python/docs/source/reference/pyspark.pandas/window.rst | 2 ++
 1 file changed, 2 insertions(+)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (ea6857abff8 -> 1c46c87ddb1)

2022-09-14 Thread ruifengz

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from ea6857abff8 [SPARK-40426][SQL] Return a map from 
SparkThrowable.getMessageParameters
 add 1c46c87ddb1 [SPARK-40421][PS] Make `spearman` correlation in 
`DataFrame.corr` support missing values and `min_periods`

No new revisions were added by this update.

Summary of changes:
 python/pyspark/pandas/frame.py| 389 +-
 python/pyspark/pandas/tests/test_stats.py |  66 -
 2 files changed, 275 insertions(+), 180 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (c134c7597d1 -> ea6857abff8)

2022-09-14 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from c134c7597d1 [SPARK-40339][SPARK-40342][SPARK-40345][SPARK-40348][PS] 
Implement quantile in Rolling/RollingGroupby/Expanding/ExpandingGroupby
 add ea6857abff8 [SPARK-40426][SQL] Return a map from 
SparkThrowable.getMessageParameters

No new revisions were added by this update.

Summary of changes:
 .../main/java/org/apache/spark/SparkThrowable.java | 12 ++--
 .../apache/spark/memory/SparkOutOfMemoryError.java |  4 +-
 .../scala/org/apache/spark/SparkException.scala| 68 ++
 .../org/apache/spark/SparkThrowableHelper.scala| 22 +--
 .../scala/org/apache/spark/SparkFunSuite.scala |  3 +-
 .../org/apache/spark/SparkThrowableSuite.scala |  2 +-
 .../org/apache/spark/sql/AnalysisException.scala   |  8 +--
 7 files changed, 34 insertions(+), 85 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (40590e6d911 -> c134c7597d1)

2022-09-14 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 40590e6d911 [SPARK-40397][BUILD] Upgrade `org.scalatestplus:selenium` 
to 3.12.13
 add c134c7597d1 [SPARK-40339][SPARK-40342][SPARK-40345][SPARK-40348][PS] 
Implement quantile in Rolling/RollingGroupby/Expanding/ExpandingGroupby

No new revisions were added by this update.

Summary of changes:
 python/pyspark/pandas/groupby.py  |   2 +-
 python/pyspark/pandas/missing/window.py   |   4 -
 python/pyspark/pandas/tests/test_expanding.py |   8 +
 python/pyspark/pandas/tests/test_rolling.py   |   8 +
 python/pyspark/pandas/window.py   | 309 ++
 5 files changed, 326 insertions(+), 5 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (12e48527846 -> 40590e6d911)

2022-09-14 Thread sarutak

This is an automated email from the ASF dual-hosted git repository.

sarutak pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 12e48527846 [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue 
submission test coverage
 add 40590e6d911 [SPARK-40397][BUILD] Upgrade `org.scalatestplus:selenium` 
to 3.12.13

No new revisions were added by this update.

Summary of changes:
 dev/deps/spark-deps-hadoop-2-hive-2.3 |  2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3 |  2 +-
 pom.xml   | 18 +++---
 3 files changed, 13 insertions(+), 9 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.3 updated: [SPARK-38017][FOLLOWUP][3.3] Hide TimestampNTZ in the doc

2022-09-14 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new d8e157d0347 [SPARK-38017][FOLLOWUP][3.3] Hide TimestampNTZ in the doc
d8e157d0347 is described below

commit d8e157d0347f51c54e334fabe76072fc95332671
Author: Kousuke Saruta 
AuthorDate: Thu Sep 15 09:28:06 2022 +0900

[SPARK-38017][FOLLOWUP][3.3] Hide TimestampNTZ in the doc

### What changes were proposed in this pull request?

This PR removes `TimestampNTZ` from the doc about `TimeWindow` and 
`SessionWIndow`.

### Why are the changes needed?

As we discussed, it's better to hide `TimestampNTZ` from the doc.
https://github.com/apache/spark/pull/35313#issuecomment-1185192162

### Does this PR introduce _any_ user-facing change?

The document will be changed, but there is no compatibility problem.

### How was this patch tested?

Built the doc with `SKIP_RDOC=1 SKIP_SQLDOC=1 bundle exec jekyll build` at 
`doc` directory.
Then, confirmed the generated HTML.

Closes #37882 from sarutak/fix-window-doc-3.3.

Authored-by: Kousuke Saruta 
Signed-off-by: Hyukjin Kwon 
---
 python/pyspark/sql/functions.py  |  4 ++--
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 10 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index ed3b0789b47..c8d7f9cdcb5 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2557,7 +2557,7 @@ def window(
 --
 timeColumn : :class:`~pyspark.sql.Column`
 The column or the expression to use as the timestamp for windowing by 
time.
-The time column must be of TimestampType or TimestampNTZType.
+The time column must be of TimestampType.
 windowDuration : str
 A string specifying the width of the window, e.g. `10 minutes`,
 `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
@@ -2632,7 +2632,7 @@ def session_window(timeColumn: "ColumnOrName", 
gapDuration: Union[Column, str])
 --
 timeColumn : :class:`~pyspark.sql.Column` or str
 The column name or column to use as the timestamp for windowing by 
time.
-The time column must be of TimestampType or TimestampNTZType.
+The time column must be of TimestampType.
 gapDuration : :class:`~pyspark.sql.Column` or str
 A Python string literal or column specifying the timeout of the 
session. It could be
 static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that 
specifies gap
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index f6c3bc7e3ce..6dbbca67338 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3645,7 +3645,7 @@ object functions {
* processing time.
*
* @param timeColumn The column or the expression to use as the timestamp 
for windowing by time.
-   *   The time column must be of TimestampType or 
TimestampNTZType.
+   *   The time column must be of TimestampType.
* @param windowDuration A string specifying the width of the window, e.g. 
`10 minutes`,
*   `1 second`. Check 
`org.apache.spark.unsafe.types.CalendarInterval` for
*   valid duration identifiers. Note that the duration 
is a fixed length of
@@ -3701,7 +3701,7 @@ object functions {
* processing time.
*
* @param timeColumn The column or the expression to use as the timestamp 
for windowing by time.
-   *   The time column must be of TimestampType or 
TimestampNTZType.
+   *   The time column must be of TimestampType.
* @param windowDuration A string specifying the width of the window, e.g. 
`10 minutes`,
*   `1 second`. Check 
`org.apache.spark.unsafe.types.CalendarInterval` for
*   valid duration identifiers. Note that the duration 
is a fixed length of
@@ -3746,7 +3746,7 @@ object functions {
* processing time.
*
* @param timeColumn The column or the expression to use as the timestamp 
for windowing by time.
-   *   The time column must be of TimestampType or 
TimestampNTZType.
+   *   The time column must be of TimestampType.
* @param windowDuration A string specifying the width of the window, e.g. 
`10 minutes`,
*   `1 second`. Check 
`org.apache.spark.unsafe.types.CalendarInterval` for
*   valid duration identifiers.
@@ -3774,7 +3774,7 @@ object functions {

[spark] branch branch-3.2 updated: [SPARK-38017][FOLLOWUP][3.2] Hide TimestampNTZ in the doc

2022-09-14 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new ce55a8f787b [SPARK-38017][FOLLOWUP][3.2] Hide TimestampNTZ in the doc
ce55a8f787b is described below

commit ce55a8f787b0453132bca06089da99462f61617a
Author: Kousuke Saruta 
AuthorDate: Thu Sep 15 09:27:20 2022 +0900

[SPARK-38017][FOLLOWUP][3.2] Hide TimestampNTZ in the doc

### What changes were proposed in this pull request?

This PR removes `TimestampNTZ` from the doc about `TimeWindow` and 
`SessionWIndow`.

### Why are the changes needed?

As we discussed, it's better to hide `TimestampNTZ` from the doc.
https://github.com/apache/spark/pull/35313#issuecomment-1185192162

### Does this PR introduce _any_ user-facing change?

The document will be changed, but there is no compatibility problem.

### How was this patch tested?

Built the doc with `SKIP_RDOC=1 SKIP_SQLDOC=1 bundle exec jekyll build` at 
`doc` directory.
Then, confirmed the generated HTML.

Closes #37883 from sarutak/fix-window-doc-3.2.

Authored-by: Kousuke Saruta 
Signed-off-by: Hyukjin Kwon 
---
 python/pyspark/sql/functions.py  | 2 +-
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 1aeafd85ade..ea59c45c4f5 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2307,7 +2307,7 @@ def window(timeColumn, windowDuration, 
slideDuration=None, startTime=None):
 --
 timeColumn : :class:`~pyspark.sql.Column`
 The column or the expression to use as the timestamp for windowing by 
time.
-The time column must be of TimestampType or TimestampNTZType.
+The time column must be of TimestampType.
 windowDuration : str
 A string specifying the width of the window, e.g. `10 minutes`,
 `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index f4801eec060..a4c77b20c65 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3517,7 +3517,7 @@ object functions {
* processing time.
*
* @param timeColumn The column or the expression to use as the timestamp 
for windowing by time.
-   *   The time column must be of TimestampType or 
TimestampNTZType.
+   *   The time column must be of TimestampType.
* @param windowDuration A string specifying the width of the window, e.g. 
`10 minutes`,
*   `1 second`. Check 
`org.apache.spark.unsafe.types.CalendarInterval` for
*   valid duration identifiers. Note that the duration 
is a fixed length of
@@ -3573,7 +3573,7 @@ object functions {
* processing time.
*
* @param timeColumn The column or the expression to use as the timestamp 
for windowing by time.
-   *   The time column must be of TimestampType or 
TimestampNTZType.
+   *   The time column must be of TimestampType.
* @param windowDuration A string specifying the width of the window, e.g. 
`10 minutes`,
*   `1 second`. Check 
`org.apache.spark.unsafe.types.CalendarInterval` for
*   valid duration identifiers. Note that the duration 
is a fixed length of
@@ -3618,7 +3618,7 @@ object functions {
* processing time.
*
* @param timeColumn The column or the expression to use as the timestamp 
for windowing by time.
-   *   The time column must be of TimestampType or 
TimestampNTZType.
+   *   The time column must be of TimestampType.
* @param windowDuration A string specifying the width of the window, e.g. 
`10 minutes`,
*   `1 second`. Check 
`org.apache.spark.unsafe.types.CalendarInterval` for
*   valid duration identifiers.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (801faba5619 -> 12e48527846)

2022-09-14 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 801faba5619 [SPARK-40420][SQL] Sort error message parameters by names 
in the JSON formats
 add 12e48527846 [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue 
submission test coverage

No new revisions were added by this update.

Summary of changes:
 docs/running-on-kubernetes.md| 5 +++--
 .../org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala  | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.3 updated: [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue submission test coverage

2022-09-14 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new ec40006aa3b [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue 
submission test coverage
ec40006aa3b is described below

commit ec40006aa3bda9f6fd03bb9c0bda561c139ed5ce
Author: Dongjoon Hyun 
AuthorDate: Wed Sep 14 09:28:04 2022 -0700

[SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue submission test 
coverage

### What changes were proposed in this pull request?

This PR aims to add explicit Yunikorn queue submission test coverage 
instead of implicit assignment by admission controller.

### Why are the changes needed?

- To provide a proper test coverage.
- To prevent the side effect of YuniKorn admission controller which 
overrides all Spark's scheduler settings by default (if we do not edit the rule 
explicitly). This breaks Apache Spark's default scheduler K8s IT test coverage.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually run the CI and check the YuniKorn queue UI.
```
$ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests 
-Dspark.kubernetes.test.deployMode=docker-desktop 
"kubernetes-integration-tests/test" -Dtest.exclude.tags=minikube,local,decom 
-Dtest.default.exclude.tags=
```

https://user-images.githubusercontent.com/9700541/190112005-5863bdd3-2e43-4ec7-b34b-a286d1a7c95e.png";>

Closes #37877 from dongjoon-hyun/SPARK-40423.

Authored-by: Dongjoon Hyun 
Signed-off-by: Dongjoon Hyun 
(cherry picked from commit 12e48527846d993a78b159fbba3e900a4feb7b55)
Signed-off-by: Dongjoon Hyun 
---
 docs/running-on-kubernetes.md| 5 +++--
 .../org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala  | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index b57175f358c..f7f7ec539b8 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1822,8 +1822,7 @@ Install Apache YuniKorn:
 ```bash
 helm repo add yunikorn https://apache.github.io/yunikorn-release
 helm repo update
-kubectl create namespace yunikorn
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.1.0
+helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.1.0 
--create-namespace --set embedAdmissionController=false
 ```
 
 The above steps will install YuniKorn v1.1.0 on an existing Kubernetes cluster.
@@ -1834,6 +1833,8 @@ Submit Spark jobs with the following extra options:
 
 ```bash
 --conf spark.kubernetes.scheduler.name=yunikorn
+--conf spark.kubernetes.driver.label.queue=root.default
+--conf spark.kubernetes.executor.label.queue=root.default
 --conf spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id={{APP_ID}}
 --conf 
spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id={{APP_ID}}
 ```
diff --git 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala
 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala
index 5a3c063efa1..0dfb88b259e 100644
--- 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala
+++ 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala
@@ -21,8 +21,11 @@ class YuniKornSuite extends KubernetesSuite {
 
   override protected def setUpTest(): Unit = {
 super.setUpTest()
+val namespace = sparkAppConf.get("spark.kubernetes.namespace")
 sparkAppConf
   .set("spark.kubernetes.scheduler.name", "yunikorn")
+  .set("spark.kubernetes.driver.label.queue", "root." + namespace)
+  .set("spark.kubernetes.executor.label.queue", "root." + namespace)
   .set("spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id", 
"{{APP_ID}}")
   .set("spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id", 
"{{APP_ID}}")
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r56822 - in /dev/spark/v3.3.1-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.3/ _site/api/R/deps/jquery-3.6.0/ _site/api

[spark] branch master updated: [SPARK-40429][SQL] Only set KeyGroupedPartitioning when the referenced column is in the output

svn commit: r56821 - /dev/spark/v3.3.1-rc1-bin/

[spark] branch master updated (1c46c87ddb1 -> 0ea17c4d3c3)

[spark] branch master updated (ea6857abff8 -> 1c46c87ddb1)

[spark] branch master updated (c134c7597d1 -> ea6857abff8)

[spark] branch master updated (40590e6d911 -> c134c7597d1)

[spark] branch master updated (12e48527846 -> 40590e6d911)

[spark] branch branch-3.3 updated: [SPARK-38017][FOLLOWUP][3.3] Hide TimestampNTZ in the doc

[spark] branch branch-3.2 updated: [SPARK-38017][FOLLOWUP][3.2] Hide TimestampNTZ in the doc

[spark] branch master updated (801faba5619 -> 12e48527846)

[spark] branch branch-3.3 updated: [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue submission test coverage

12 matches

Site Navigation

Mail list logo

Footer information