svn commit: r56822 - in /dev/spark/v3.3.1-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.3/ _site/api/R/deps/jquery-3.6.0/ _site/api
Author: yumwang Date: Thu Sep 15 06:07:25 2022 New Revision: 56822 Log: Apache Spark v3.3.1-rc1 docs [This commit notification would consist of 2684 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40429][SQL] Only set KeyGroupedPartitioning when the referenced column is in the output
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 034e48fd47f [SPARK-40429][SQL] Only set KeyGroupedPartitioning when the referenced column is in the output 034e48fd47f is described below commit 034e48fd47f49a603c1cad507608958f5beeddc8 Author: huaxingao AuthorDate: Wed Sep 14 23:06:22 2022 -0700 [SPARK-40429][SQL] Only set KeyGroupedPartitioning when the referenced column is in the output ### What changes were proposed in this pull request? Only set `KeyGroupedPartitioning` when the referenced column is in the output ### Why are the changes needed? bug fixing ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new test Closes #37886 from huaxingao/keyGroupedPartitioning. Authored-by: huaxingao Signed-off-by: Dongjoon Hyun --- .../datasources/v2/V2ScanPartitioningAndOrdering.scala | 14 -- .../apache/spark/sql/connector/MetadataColumnSuite.scala | 16 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala index 8ab0dc70726..5c8c7cf420d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala @@ -41,8 +41,18 @@ object V2ScanPartitioningAndOrdering extends Rule[LogicalPlan] with SQLConfHelpe private def partitioning(plan: LogicalPlan) = plan.transformDown { case d @ DataSourceV2ScanRelation(relation, scan: SupportsReportPartitioning, _, None, _) => val catalystPartitioning = scan.outputPartitioning() match { -case kgp: KeyGroupedPartitioning => sequenceToOption(kgp.keys().map( - V2ExpressionUtils.toCatalystOpt(_, relation, relation.funCatalog))) +case kgp: KeyGroupedPartitioning => + val partitioning = sequenceToOption( +kgp.keys().map(V2ExpressionUtils.toCatalystOpt(_, relation, relation.funCatalog))) + if (partitioning.isEmpty) { +None + } else { +if (partitioning.get.forall(p => p.references.subsetOf(d.outputSet))) { + partitioning +} else { + None +} + } case _: UnknownPartitioning => None case p => throw new IllegalArgumentException("Unsupported data source V2 partitioning " + "type: " + p.getClass.getSimpleName) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala index 9b90ee43657..8454b9f85ec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala @@ -216,4 +216,20 @@ class MetadataColumnSuite extends DatasourceV2SQLBase { .withColumn("right_all", struct($"right.*")) checkAnswer(dfQuery, Row(1, "a", "b", Row(1, "a"), Row(1, "b"))) } + + test("SPARK-40429: Only set KeyGroupedPartitioning when the referenced column is in the output") { +withTable(tbl) { + sql(s"CREATE TABLE $tbl (id bigint, data string) PARTITIONED BY (id)") + sql(s"INSERT INTO $tbl VALUES (1, 'a'), (2, 'b'), (3, 'c')") + checkAnswer( +spark.table(tbl).select("index", "_partition"), +Seq(Row(0, "3"), Row(0, "2"), Row(0, "1")) + ) + + checkAnswer( +spark.table(tbl).select("id", "index", "_partition"), +Seq(Row(3, 0, "3"), Row(2, 0, "2"), Row(1, 0, "1")) + ) +} + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r56821 - /dev/spark/v3.3.1-rc1-bin/
Author: yumwang Date: Thu Sep 15 03:59:44 2022 New Revision: 56821 Log: Apache Spark v3.3.1-rc1 Added: dev/spark/v3.3.1-rc1-bin/ dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz (with props) dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.asc dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.sha512 dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz (with props) dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.asc dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.sha512 dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop2.tgz (with props) dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop2.tgz.asc dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop2.tgz.sha512 dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3-scala2.13.tgz (with props) dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3-scala2.13.tgz.asc dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3-scala2.13.tgz.sha512 dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3.tgz (with props) dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3.tgz.asc dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-hadoop3.tgz.sha512 dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-without-hadoop.tgz (with props) dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-without-hadoop.tgz.asc dev/spark/v3.3.1-rc1-bin/spark-3.3.1-bin-without-hadoop.tgz.sha512 dev/spark/v3.3.1-rc1-bin/spark-3.3.1.tgz (with props) dev/spark/v3.3.1-rc1-bin/spark-3.3.1.tgz.asc dev/spark/v3.3.1-rc1-bin/spark-3.3.1.tgz.sha512 Added: dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.asc == --- dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.asc (added) +++ dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.asc Thu Sep 15 03:59:44 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEhnJ9Q+c6QV9noLGhTmiz5s1HNlMFAmMimi0THHl1bXdhbmdA +YXBhY2hlLm9yZwAKCRBOaLPmzUc2U9WvEADMuirrnIIRcDuUKIkSSzNe+jnGwSVI +H/9WaRp9V/JG4pb5p/9RN9f7t7UQ1mb6s2VxzhJ3CqSytvzLtXIH54/zbykLua00 +HSqLqnrBmR8xFof8aS+qP+2M0gZDmHwG1xZh39nHYU25mAyP0hj4AYSfvloG8/Nk +oyx5kqJe7jVA6FaXoIcgxFWJ8wGW8D5TrIP3Z42+sAEoVDNTJGIkDEFQrOSYt7zz ++tuW2mHIucJRYwCTs++tSIlybr1LHsqiTE4FoFn0h3Dhy6TfOxlyT0uHI2Si/JqU +EzFc23r6sNlP+IjJsYEqrVc/q6tkOEiqGNnIrV1srNAyTAEo53+v7mypCq8qUhSx +hyz+VACeGXmHruu7hna5G7RkxSwq+IJQZHr3Q87RIoYmHOt7IUe3C5BgYkLHT/tw +sSqkNtQCDpW28vnPJ6nmn2rN+Cfy7rCj8Olp/QaN1EkaMWMgTfWZPNzsFC/NUFjq +pVtaNeuqMUPOErUHRMSd0B9vVqv4cLtdMQmw0AkfnvS0L3CTeCm7Ym6ucND7fFn1 +9I3p+tTg/G0ugcuzOMEvn/w0WmKop2ryItp8+K7s/U8mNWpz6X8sfY+BCXR+Lagj +/IXu0DoVHaEFTn9Ezz53fnRICTfyNPSDlPN95ECsj0dEZ2WBMz9ryQYUmTvD2jm4 +pim/spRfoTjCUQ== +=QnuQ +-END PGP SIGNATURE- Added: dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.sha512 == --- dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.sha512 (added) +++ dev/spark/v3.3.1-rc1-bin/SparkR_3.3.1.tar.gz.sha512 Thu Sep 15 03:59:44 2022 @@ -0,0 +1 @@ +d1436ba8a20af88e77999034e5cd854be3b5edbfeb00c7da619cc9e35616b1bfe4f67476d1994fc6cca2f1590ad3c5fd13dc16cc752a989dfdefd08fa38bd321 SparkR_3.3.1.tar.gz Added: dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.asc == --- dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.asc (added) +++ dev/spark/v3.3.1-rc1-bin/pyspark-3.3.1.tar.gz.asc Thu Sep 15 03:59:44 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEhnJ9Q+c6QV9noLGhTmiz5s1HNlMFAmMimjITHHl1bXdhbmdA +YXBhY2hlLm9yZwAKCRBOaLPmzUc2U8eUD/sF0jnPVUqnDuoRUscfaa9KhmGCU7dH +ARkF3R4+yPTCM6FmSK6r1OjeOiHjjyVe6g3VvMbv7vt0C28SV7E3uluIc27WV14J +rFNva0D48//O3eE7pNu+oxDYxuwJDQsM6ke/v/YKykf1P8YyVqCB3OGlEIJeSNKM +UPSr8Z7KY93GbOmqN1YDx5g7O2ChRjlBnpp+tZ6/+8XxoLPtzDsGR1NwTxMylhgb +ZAtDXcmre6zZYghGWbcwKxGnY7NA2qfdHsiCN+fECvd27Rhj9BIb3luyj77zA5eJ +1cVifPZi3ulmYewRPa1Xa1bh2aCAWCugVoT1CP23AS21Nxa9TAr72ZTW/QVUce1B +S7ZKk5rGHnVz+vyiVrgSwxB7fg7opoFXif0gtXELwshLhbVXXfQ71cP/XPjrl0k2 +OpQla4bU5nIlgOB8RI7AFvOyJehowXMvjI/mM1nE4GeFSIPdrHQOMvo45zWNd+jP +sjHEjf4sHT309e0eY8sTmYrG3Wu7cIkuIjHUZvZfWGEw5MESQJp30fuG6THS527p +p1QWcfr7YAjKTClSuiNpMgQrDEjXteFUH+iXzgEDSNTy0rmITYh/m6PHgobhOuGz +fqpceuJDS+U8VGo0tGEK5knFb2QAwhGSQHctA3vVqlkjpy/7OLJF/uwFVx0u+Yuh +MdAa1X3Q66/DWQ== +=FrlC +-END PGP SIGNATURE- Added: dev/spark/v3.3.
[spark] branch master updated (1c46c87ddb1 -> 0ea17c4d3c3)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 1c46c87ddb1 [SPARK-40421][PS] Make `spearman` correlation in `DataFrame.corr` support missing values and `min_periods` add 0ea17c4d3c3 [SPARK-40339][SPARK-40342][PS][DOCS][FOLLOW-UP] Add Rolling.quantile and Expanding.quantile into PySpark documentation No new revisions were added by this update. Summary of changes: python/docs/source/reference/pyspark.pandas/window.rst | 2 ++ 1 file changed, 2 insertions(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (ea6857abff8 -> 1c46c87ddb1)
This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from ea6857abff8 [SPARK-40426][SQL] Return a map from SparkThrowable.getMessageParameters add 1c46c87ddb1 [SPARK-40421][PS] Make `spearman` correlation in `DataFrame.corr` support missing values and `min_periods` No new revisions were added by this update. Summary of changes: python/pyspark/pandas/frame.py| 389 +- python/pyspark/pandas/tests/test_stats.py | 66 - 2 files changed, 275 insertions(+), 180 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (c134c7597d1 -> ea6857abff8)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from c134c7597d1 [SPARK-40339][SPARK-40342][SPARK-40345][SPARK-40348][PS] Implement quantile in Rolling/RollingGroupby/Expanding/ExpandingGroupby add ea6857abff8 [SPARK-40426][SQL] Return a map from SparkThrowable.getMessageParameters No new revisions were added by this update. Summary of changes: .../main/java/org/apache/spark/SparkThrowable.java | 12 ++-- .../apache/spark/memory/SparkOutOfMemoryError.java | 4 +- .../scala/org/apache/spark/SparkException.scala| 68 ++ .../org/apache/spark/SparkThrowableHelper.scala| 22 +-- .../scala/org/apache/spark/SparkFunSuite.scala | 3 +- .../org/apache/spark/SparkThrowableSuite.scala | 2 +- .../org/apache/spark/sql/AnalysisException.scala | 8 +-- 7 files changed, 34 insertions(+), 85 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (40590e6d911 -> c134c7597d1)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 40590e6d911 [SPARK-40397][BUILD] Upgrade `org.scalatestplus:selenium` to 3.12.13 add c134c7597d1 [SPARK-40339][SPARK-40342][SPARK-40345][SPARK-40348][PS] Implement quantile in Rolling/RollingGroupby/Expanding/ExpandingGroupby No new revisions were added by this update. Summary of changes: python/pyspark/pandas/groupby.py | 2 +- python/pyspark/pandas/missing/window.py | 4 - python/pyspark/pandas/tests/test_expanding.py | 8 + python/pyspark/pandas/tests/test_rolling.py | 8 + python/pyspark/pandas/window.py | 309 ++ 5 files changed, 326 insertions(+), 5 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (12e48527846 -> 40590e6d911)
This is an automated email from the ASF dual-hosted git repository. sarutak pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 12e48527846 [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue submission test coverage add 40590e6d911 [SPARK-40397][BUILD] Upgrade `org.scalatestplus:selenium` to 3.12.13 No new revisions were added by this update. Summary of changes: dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 18 +++--- 3 files changed, 13 insertions(+), 9 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-38017][FOLLOWUP][3.3] Hide TimestampNTZ in the doc
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new d8e157d0347 [SPARK-38017][FOLLOWUP][3.3] Hide TimestampNTZ in the doc d8e157d0347 is described below commit d8e157d0347f51c54e334fabe76072fc95332671 Author: Kousuke Saruta AuthorDate: Thu Sep 15 09:28:06 2022 +0900 [SPARK-38017][FOLLOWUP][3.3] Hide TimestampNTZ in the doc ### What changes were proposed in this pull request? This PR removes `TimestampNTZ` from the doc about `TimeWindow` and `SessionWIndow`. ### Why are the changes needed? As we discussed, it's better to hide `TimestampNTZ` from the doc. https://github.com/apache/spark/pull/35313#issuecomment-1185192162 ### Does this PR introduce _any_ user-facing change? The document will be changed, but there is no compatibility problem. ### How was this patch tested? Built the doc with `SKIP_RDOC=1 SKIP_SQLDOC=1 bundle exec jekyll build` at `doc` directory. Then, confirmed the generated HTML. Closes #37882 from sarutak/fix-window-doc-3.3. Authored-by: Kousuke Saruta Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/functions.py | 4 ++-- sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 10 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index ed3b0789b47..c8d7f9cdcb5 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2557,7 +2557,7 @@ def window( -- timeColumn : :class:`~pyspark.sql.Column` The column or the expression to use as the timestamp for windowing by time. -The time column must be of TimestampType or TimestampNTZType. +The time column must be of TimestampType. windowDuration : str A string specifying the width of the window, e.g. `10 minutes`, `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for @@ -2632,7 +2632,7 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) -- timeColumn : :class:`~pyspark.sql.Column` or str The column name or column to use as the timestamp for windowing by time. -The time column must be of TimestampType or TimestampNTZType. +The time column must be of TimestampType. gapDuration : :class:`~pyspark.sql.Column` or str A Python string literal or column specifying the timeout of the session. It could be static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index f6c3bc7e3ce..6dbbca67338 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3645,7 +3645,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. Note that the duration is a fixed length of @@ -3701,7 +3701,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. Note that the duration is a fixed length of @@ -3746,7 +3746,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. @@ -3774,7 +3774,7 @@ object functions {
[spark] branch branch-3.2 updated: [SPARK-38017][FOLLOWUP][3.2] Hide TimestampNTZ in the doc
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new ce55a8f787b [SPARK-38017][FOLLOWUP][3.2] Hide TimestampNTZ in the doc ce55a8f787b is described below commit ce55a8f787b0453132bca06089da99462f61617a Author: Kousuke Saruta AuthorDate: Thu Sep 15 09:27:20 2022 +0900 [SPARK-38017][FOLLOWUP][3.2] Hide TimestampNTZ in the doc ### What changes were proposed in this pull request? This PR removes `TimestampNTZ` from the doc about `TimeWindow` and `SessionWIndow`. ### Why are the changes needed? As we discussed, it's better to hide `TimestampNTZ` from the doc. https://github.com/apache/spark/pull/35313#issuecomment-1185192162 ### Does this PR introduce _any_ user-facing change? The document will be changed, but there is no compatibility problem. ### How was this patch tested? Built the doc with `SKIP_RDOC=1 SKIP_SQLDOC=1 bundle exec jekyll build` at `doc` directory. Then, confirmed the generated HTML. Closes #37883 from sarutak/fix-window-doc-3.2. Authored-by: Kousuke Saruta Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/functions.py | 2 +- sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 1aeafd85ade..ea59c45c4f5 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2307,7 +2307,7 @@ def window(timeColumn, windowDuration, slideDuration=None, startTime=None): -- timeColumn : :class:`~pyspark.sql.Column` The column or the expression to use as the timestamp for windowing by time. -The time column must be of TimestampType or TimestampNTZType. +The time column must be of TimestampType. windowDuration : str A string specifying the width of the window, e.g. `10 minutes`, `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index f4801eec060..a4c77b20c65 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3517,7 +3517,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. Note that the duration is a fixed length of @@ -3573,7 +3573,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. Note that the duration is a fixed length of @@ -3618,7 +3618,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (801faba5619 -> 12e48527846)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 801faba5619 [SPARK-40420][SQL] Sort error message parameters by names in the JSON formats add 12e48527846 [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue submission test coverage No new revisions were added by this update. Summary of changes: docs/running-on-kubernetes.md| 5 +++-- .../org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue submission test coverage
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new ec40006aa3b [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue submission test coverage ec40006aa3b is described below commit ec40006aa3bda9f6fd03bb9c0bda561c139ed5ce Author: Dongjoon Hyun AuthorDate: Wed Sep 14 09:28:04 2022 -0700 [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue submission test coverage ### What changes were proposed in this pull request? This PR aims to add explicit Yunikorn queue submission test coverage instead of implicit assignment by admission controller. ### Why are the changes needed? - To provide a proper test coverage. - To prevent the side effect of YuniKorn admission controller which overrides all Spark's scheduler settings by default (if we do not edit the rule explicitly). This breaks Apache Spark's default scheduler K8s IT test coverage. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually run the CI and check the YuniKorn queue UI. ``` $ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests -Dspark.kubernetes.test.deployMode=docker-desktop "kubernetes-integration-tests/test" -Dtest.exclude.tags=minikube,local,decom -Dtest.default.exclude.tags= ``` https://user-images.githubusercontent.com/9700541/190112005-5863bdd3-2e43-4ec7-b34b-a286d1a7c95e.png";> Closes #37877 from dongjoon-hyun/SPARK-40423. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 12e48527846d993a78b159fbba3e900a4feb7b55) Signed-off-by: Dongjoon Hyun --- docs/running-on-kubernetes.md| 5 +++-- .../org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index b57175f358c..f7f7ec539b8 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1822,8 +1822,7 @@ Install Apache YuniKorn: ```bash helm repo add yunikorn https://apache.github.io/yunikorn-release helm repo update -kubectl create namespace yunikorn -helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.1.0 +helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.1.0 --create-namespace --set embedAdmissionController=false ``` The above steps will install YuniKorn v1.1.0 on an existing Kubernetes cluster. @@ -1834,6 +1833,8 @@ Submit Spark jobs with the following extra options: ```bash --conf spark.kubernetes.scheduler.name=yunikorn +--conf spark.kubernetes.driver.label.queue=root.default +--conf spark.kubernetes.executor.label.queue=root.default --conf spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id={{APP_ID}} --conf spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id={{APP_ID}} ``` diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala index 5a3c063efa1..0dfb88b259e 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala @@ -21,8 +21,11 @@ class YuniKornSuite extends KubernetesSuite { override protected def setUpTest(): Unit = { super.setUpTest() +val namespace = sparkAppConf.get("spark.kubernetes.namespace") sparkAppConf .set("spark.kubernetes.scheduler.name", "yunikorn") + .set("spark.kubernetes.driver.label.queue", "root." + namespace) + .set("spark.kubernetes.executor.label.queue", "root." + namespace) .set("spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}") .set("spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}") } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org