svn commit: r24677 - in /dev/spark/2.3.1-SNAPSHOT-2018_02_03_14_01-45f0f4f-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sat Feb 3 22:15:08 2018 New Revision: 24677 Log: Apache Spark 2.3.1-SNAPSHOT-2018_02_03_14_01-45f0f4f docs [This commit notification would consist of 1442 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r24675 - in /dev/spark/2.4.0-SNAPSHOT-2018_02_03_12_01-551dff2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sat Feb 3 20:15:14 2018 New Revision: 24675 Log: Apache Spark 2.4.0-SNAPSHOT-2018_02_03_12_01-551dff2 docs [This commit notification would consist of 1444 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-21658][SQL][PYSPARK] Revert "[] Add default None for value in na.replace in PySpark"
Repository: spark Updated Branches: refs/heads/branch-2.3 be3de8791 -> 45f0f4ff7 [SPARK-21658][SQL][PYSPARK] Revert "[] Add default None for value in na.replace in PySpark" This reverts commit 0fcde87aadc9a92e138f11583119465ca4b5c518. See the discussion in [SPARK-21658](https://issues.apache.org/jira/browse/SPARK-21658), [SPARK-19454](https://issues.apache.org/jira/browse/SPARK-19454) and https://github.com/apache/spark/pull/16793 Author: hyukjinkwon Closes #20496 from HyukjinKwon/revert-SPARK-21658. (cherry picked from commit 551dff2bccb65e9b3f77b986f167aec90d9a6016) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/45f0f4ff Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/45f0f4ff Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/45f0f4ff Branch: refs/heads/branch-2.3 Commit: 45f0f4ff76accab3387b08b3773a0b127333ea3a Parents: be3de87 Author: hyukjinkwon Authored: Sat Feb 3 10:40:21 2018 -0800 Committer: gatorsmile Committed: Sat Feb 3 10:40:29 2018 -0800 -- python/pyspark/sql/dataframe.py | 12 +--- 1 file changed, 1 insertion(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/45f0f4ff/python/pyspark/sql/dataframe.py -- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 1496cba..2e55407 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1577,16 +1577,6 @@ class DataFrame(object): |null| null|null| ++--++ ->>> df4.na.replace('Alice').show() -++--++ -| age|height|name| -++--++ -| 10|80|null| -| 5| null| Bob| -|null| null| Tom| -|null| null|null| -++--++ - >>> df4.na.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show() ++--++ | age|height|name| @@ -2055,7 +2045,7 @@ class DataFrameNaFunctions(object): fill.__doc__ = DataFrame.fillna.__doc__ -def replace(self, to_replace, value=None, subset=None): +def replace(self, to_replace, value, subset=None): return self.df.replace(to_replace, value, subset) replace.__doc__ = DataFrame.replace.__doc__ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-21658][SQL][PYSPARK] Revert "[] Add default None for value in na.replace in PySpark"
Repository: spark Updated Branches: refs/heads/master 4aaa7d40b -> 551dff2bc [SPARK-21658][SQL][PYSPARK] Revert "[] Add default None for value in na.replace in PySpark" This reverts commit 0fcde87aadc9a92e138f11583119465ca4b5c518. See the discussion in [SPARK-21658](https://issues.apache.org/jira/browse/SPARK-21658), [SPARK-19454](https://issues.apache.org/jira/browse/SPARK-19454) and https://github.com/apache/spark/pull/16793 Author: hyukjinkwon Closes #20496 from HyukjinKwon/revert-SPARK-21658. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/551dff2b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/551dff2b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/551dff2b Branch: refs/heads/master Commit: 551dff2bccb65e9b3f77b986f167aec90d9a6016 Parents: 4aaa7d4 Author: hyukjinkwon Authored: Sat Feb 3 10:40:21 2018 -0800 Committer: gatorsmile Committed: Sat Feb 3 10:40:21 2018 -0800 -- python/pyspark/sql/dataframe.py | 12 +--- 1 file changed, 1 insertion(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/551dff2b/python/pyspark/sql/dataframe.py -- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 1496cba..2e55407 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1577,16 +1577,6 @@ class DataFrame(object): |null| null|null| ++--++ ->>> df4.na.replace('Alice').show() -++--++ -| age|height|name| -++--++ -| 10|80|null| -| 5| null| Bob| -|null| null| Tom| -|null| null|null| -++--++ - >>> df4.na.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show() ++--++ | age|height|name| @@ -2055,7 +2045,7 @@ class DataFrameNaFunctions(object): fill.__doc__ = DataFrame.fillna.__doc__ -def replace(self, to_replace, value=None, subset=None): +def replace(self, to_replace, value, subset=None): return self.df.replace(to_replace, value, subset) replace.__doc__ = DataFrame.replace.__doc__ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][DOC] Use raw triple double quotes around docstrings where there are occurrences of backslashes.
Repository: spark Updated Branches: refs/heads/branch-2.3 4de206182 -> be3de8791 [MINOR][DOC] Use raw triple double quotes around docstrings where there are occurrences of backslashes. >From [PEP 257](https://www.python.org/dev/peps/pep-0257/): > For consistency, always use """triple double quotes""" around docstrings. Use > r"""raw triple double quotes""" if you use any backslashes in your > docstrings. For Unicode docstrings, use u"""Unicode triple-quoted strings""". For example, this is what help (kafka_wordcount) shows: ``` DESCRIPTION Counts words in UTF8 encoded, ' ' delimited text received from the network every second. Usage: kafka_wordcount.py To run this on your local machine, you need to setup Kafka and create a producer first, see http://kafka.apache.org/documentation.html#quickstart and then run the example `$ bin/spark-submit --jars external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar examples/src/main/python/streaming/kafka_wordcount.py localhost:2181 test` ``` This is what it shows, after the fix: ``` DESCRIPTION Counts words in UTF8 encoded, '\n' delimited text received from the network every second. Usage: kafka_wordcount.py To run this on your local machine, you need to setup Kafka and create a producer first, see http://kafka.apache.org/documentation.html#quickstart and then run the example `$ bin/spark-submit --jars \ external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar \ examples/src/main/python/streaming/kafka_wordcount.py \ localhost:2181 test` ``` The thing worth noticing is no linebreak here in the help. ## What changes were proposed in this pull request? Change triple double quotes to raw triple double quotes when there are occurrences of backslashes in docstrings. ## How was this patch tested? Manually as this is a doc fix. Author: Shashwat Anand Closes #20497 from ashashwat/docstring-fixes. (cherry picked from commit 4aaa7d40bf495317e740b6d6f9c2a55dfd03521b) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be3de879 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be3de879 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be3de879 Branch: refs/heads/branch-2.3 Commit: be3de87914f29a56137e391d0cf494c0d1a7ba12 Parents: 4de2061 Author: Shashwat Anand Authored: Sat Feb 3 10:31:04 2018 -0800 Committer: gatorsmile Committed: Sat Feb 3 10:31:17 2018 -0800 -- .../src/main/python/sql/streaming/structured_network_wordcount.py | 2 +- .../python/sql/streaming/structured_network_wordcount_windowed.py | 2 +- examples/src/main/python/streaming/direct_kafka_wordcount.py | 2 +- examples/src/main/python/streaming/flume_wordcount.py | 2 +- examples/src/main/python/streaming/kafka_wordcount.py | 2 +- examples/src/main/python/streaming/network_wordcount.py| 2 +- examples/src/main/python/streaming/network_wordjoinsentiments.py | 2 +- examples/src/main/python/streaming/sql_network_wordcount.py| 2 +- examples/src/main/python/streaming/stateful_network_wordcount.py | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/be3de879/examples/src/main/python/sql/streaming/structured_network_wordcount.py -- diff --git a/examples/src/main/python/sql/streaming/structured_network_wordcount.py b/examples/src/main/python/sql/streaming/structured_network_wordcount.py index afde255..c3284c1 100644 --- a/examples/src/main/python/sql/streaming/structured_network_wordcount.py +++ b/examples/src/main/python/sql/streaming/structured_network_wordcount.py @@ -15,7 +15,7 @@ # limitations under the License. # -""" +r""" Counts words in UTF8 encoded, '\n' delimited text received from the network. Usage: structured_network_wordcount.py and describe the TCP server that Structured Streaming http://git-wip-us.apache.org/repos/asf/spark/blob/be3de879/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py -- diff --git a/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py b/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py index 02a7d33..db67255 100644 --- a/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py +++ b/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py @@ -15,7 +15,7 @@ # limitations under the License. # -""" +r""" Counts words in UTF8 encoded, '\n' delimited tex
spark git commit: [MINOR][DOC] Use raw triple double quotes around docstrings where there are occurrences of backslashes.
Repository: spark Updated Branches: refs/heads/master 522e0b186 -> 4aaa7d40b [MINOR][DOC] Use raw triple double quotes around docstrings where there are occurrences of backslashes. >From [PEP 257](https://www.python.org/dev/peps/pep-0257/): > For consistency, always use """triple double quotes""" around docstrings. Use > r"""raw triple double quotes""" if you use any backslashes in your > docstrings. For Unicode docstrings, use u"""Unicode triple-quoted strings""". For example, this is what help (kafka_wordcount) shows: ``` DESCRIPTION Counts words in UTF8 encoded, ' ' delimited text received from the network every second. Usage: kafka_wordcount.py To run this on your local machine, you need to setup Kafka and create a producer first, see http://kafka.apache.org/documentation.html#quickstart and then run the example `$ bin/spark-submit --jars external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar examples/src/main/python/streaming/kafka_wordcount.py localhost:2181 test` ``` This is what it shows, after the fix: ``` DESCRIPTION Counts words in UTF8 encoded, '\n' delimited text received from the network every second. Usage: kafka_wordcount.py To run this on your local machine, you need to setup Kafka and create a producer first, see http://kafka.apache.org/documentation.html#quickstart and then run the example `$ bin/spark-submit --jars \ external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar \ examples/src/main/python/streaming/kafka_wordcount.py \ localhost:2181 test` ``` The thing worth noticing is no linebreak here in the help. ## What changes were proposed in this pull request? Change triple double quotes to raw triple double quotes when there are occurrences of backslashes in docstrings. ## How was this patch tested? Manually as this is a doc fix. Author: Shashwat Anand Closes #20497 from ashashwat/docstring-fixes. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4aaa7d40 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4aaa7d40 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4aaa7d40 Branch: refs/heads/master Commit: 4aaa7d40bf495317e740b6d6f9c2a55dfd03521b Parents: 522e0b1 Author: Shashwat Anand Authored: Sat Feb 3 10:31:04 2018 -0800 Committer: gatorsmile Committed: Sat Feb 3 10:31:04 2018 -0800 -- .../src/main/python/sql/streaming/structured_network_wordcount.py | 2 +- .../python/sql/streaming/structured_network_wordcount_windowed.py | 2 +- examples/src/main/python/streaming/direct_kafka_wordcount.py | 2 +- examples/src/main/python/streaming/flume_wordcount.py | 2 +- examples/src/main/python/streaming/kafka_wordcount.py | 2 +- examples/src/main/python/streaming/network_wordcount.py| 2 +- examples/src/main/python/streaming/network_wordjoinsentiments.py | 2 +- examples/src/main/python/streaming/sql_network_wordcount.py| 2 +- examples/src/main/python/streaming/stateful_network_wordcount.py | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4aaa7d40/examples/src/main/python/sql/streaming/structured_network_wordcount.py -- diff --git a/examples/src/main/python/sql/streaming/structured_network_wordcount.py b/examples/src/main/python/sql/streaming/structured_network_wordcount.py index afde255..c3284c1 100644 --- a/examples/src/main/python/sql/streaming/structured_network_wordcount.py +++ b/examples/src/main/python/sql/streaming/structured_network_wordcount.py @@ -15,7 +15,7 @@ # limitations under the License. # -""" +r""" Counts words in UTF8 encoded, '\n' delimited text received from the network. Usage: structured_network_wordcount.py and describe the TCP server that Structured Streaming http://git-wip-us.apache.org/repos/asf/spark/blob/4aaa7d40/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py -- diff --git a/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py b/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py index 02a7d33..db67255 100644 --- a/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py +++ b/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py @@ -15,7 +15,7 @@ # limitations under the License. # -""" +r""" Counts words in UTF8 encoded, '\n' delimited text received from the network over a sliding window of configurable duration. Each line from the network
svn commit: r24657 - in /dev/spark/2.4.0-SNAPSHOT-2018_02_03_04_01-522e0b1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sat Feb 3 12:17:36 2018 New Revision: 24657 Log: Apache Spark 2.4.0-SNAPSHOT-2018_02_03_04_01-522e0b1 docs [This commit notification would consist of 1444 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r24656 - in /dev/spark/2.3.1-SNAPSHOT-2018_02_03_02_01-4de2061-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sat Feb 3 10:15:59 2018 New Revision: 24656 Log: Apache Spark 2.3.1-SNAPSHOT-2018_02_03_02_01-4de2061 docs [This commit notification would consist of 1442 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r24655 - in /dev/spark/2.4.0-SNAPSHOT-2018_02_03_00_01-fe73cb4-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sat Feb 3 08:16:26 2018 New Revision: 24655 Log: Apache Spark 2.4.0-SNAPSHOT-2018_02_03_00_01-fe73cb4 docs [This commit notification would consist of 1444 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all file-based data sources
Repository: spark Updated Branches: refs/heads/branch-2.3 1bcb3728d -> 4de206182 [SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all file-based data sources ## What changes were proposed in this pull request? Like Parquet, all file-based data source handles `spark.sql.files.ignoreMissingFiles` correctly. We had better have a test coverage for feature parity and in order to prevent future accidental regression for all data sources. ## How was this patch tested? Pass Jenkins with a newly added test case. Author: Dongjoon Hyun Closes #20479 from dongjoon-hyun/SPARK-23305. (cherry picked from commit 522e0b1866a0298669c83de5a47ba380dc0b7c84) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4de20618 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4de20618 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4de20618 Branch: refs/heads/branch-2.3 Commit: 4de206182c8a1f76e1e5e6b597c4b3890e2ca255 Parents: 1bcb372 Author: Dongjoon Hyun Authored: Sat Feb 3 00:04:00 2018 -0800 Committer: gatorsmile Committed: Sat Feb 3 00:04:08 2018 -0800 -- .../spark/sql/FileBasedDataSourceSuite.scala| 37 .../datasources/parquet/ParquetQuerySuite.scala | 33 - 2 files changed, 37 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4de20618/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index c272c99..640d6b1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -17,6 +17,10 @@ package org.apache.spark.sql +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSQLContext class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext { @@ -92,4 +96,37 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext { } } } + + allFileBasedDataSources.foreach { format => +testQuietly(s"Enabling/disabling ignoreMissingFiles using $format") { + def testIgnoreMissingFiles(): Unit = { +withTempDir { dir => + val basePath = dir.getCanonicalPath + Seq("0").toDF("a").write.format(format).save(new Path(basePath, "first").toString) + Seq("1").toDF("a").write.format(format).save(new Path(basePath, "second").toString) + val thirdPath = new Path(basePath, "third") + Seq("2").toDF("a").write.format(format).save(thirdPath.toString) + val df = spark.read.format(format).load( +new Path(basePath, "first").toString, +new Path(basePath, "second").toString, +new Path(basePath, "third").toString) + + val fs = thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration) + assert(fs.delete(thirdPath, true)) + checkAnswer(df, Seq(Row("0"), Row("1"))) +} + } + + withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "true") { +testIgnoreMissingFiles() + } + + withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "false") { +val exception = intercept[SparkException] { + testIgnoreMissingFiles() +} +assert(exception.getMessage().contains("does not exist")) + } +} + } } http://git-wip-us.apache.org/repos/asf/spark/blob/4de20618/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 6ad88ed..55b0f72 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -355,39 +355,6 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext } } - testQuietly("Enabling/disabling ignoreMissingFiles") { -def testIgnoreMissingFiles(): Unit = { - withTempDir { dir => -val basePath = dir.getCanonicalPath -spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString) -spark.range(1, 2).toDF("a").write.parquet(new Pa
spark git commit: [SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all file-based data sources
Repository: spark Updated Branches: refs/heads/master 63b49fa2e -> 522e0b186 [SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all file-based data sources ## What changes were proposed in this pull request? Like Parquet, all file-based data source handles `spark.sql.files.ignoreMissingFiles` correctly. We had better have a test coverage for feature parity and in order to prevent future accidental regression for all data sources. ## How was this patch tested? Pass Jenkins with a newly added test case. Author: Dongjoon Hyun Closes #20479 from dongjoon-hyun/SPARK-23305. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/522e0b18 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/522e0b18 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/522e0b18 Branch: refs/heads/master Commit: 522e0b1866a0298669c83de5a47ba380dc0b7c84 Parents: 63b49fa Author: Dongjoon Hyun Authored: Sat Feb 3 00:04:00 2018 -0800 Committer: gatorsmile Committed: Sat Feb 3 00:04:00 2018 -0800 -- .../spark/sql/FileBasedDataSourceSuite.scala| 37 .../datasources/parquet/ParquetQuerySuite.scala | 33 - 2 files changed, 37 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/522e0b18/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index c272c99..640d6b1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -17,6 +17,10 @@ package org.apache.spark.sql +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSQLContext class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext { @@ -92,4 +96,37 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext { } } } + + allFileBasedDataSources.foreach { format => +testQuietly(s"Enabling/disabling ignoreMissingFiles using $format") { + def testIgnoreMissingFiles(): Unit = { +withTempDir { dir => + val basePath = dir.getCanonicalPath + Seq("0").toDF("a").write.format(format).save(new Path(basePath, "first").toString) + Seq("1").toDF("a").write.format(format).save(new Path(basePath, "second").toString) + val thirdPath = new Path(basePath, "third") + Seq("2").toDF("a").write.format(format).save(thirdPath.toString) + val df = spark.read.format(format).load( +new Path(basePath, "first").toString, +new Path(basePath, "second").toString, +new Path(basePath, "third").toString) + + val fs = thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration) + assert(fs.delete(thirdPath, true)) + checkAnswer(df, Seq(Row("0"), Row("1"))) +} + } + + withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "true") { +testIgnoreMissingFiles() + } + + withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "false") { +val exception = intercept[SparkException] { + testIgnoreMissingFiles() +} +assert(exception.getMessage().contains("does not exist")) + } +} + } } http://git-wip-us.apache.org/repos/asf/spark/blob/522e0b18/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 6ad88ed..55b0f72 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -355,39 +355,6 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext } } - testQuietly("Enabling/disabling ignoreMissingFiles") { -def testIgnoreMissingFiles(): Unit = { - withTempDir { dir => -val basePath = dir.getCanonicalPath -spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString) -spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, "second").toString) -val thirdPath = new Path(basePath, "third") -spark.ran
spark git commit: [SPARK-23311][SQL][TEST] add FilterFunction test case for test CombineTypedFilters
Repository: spark Updated Branches: refs/heads/branch-2.3 b614c083a -> 1bcb3728d [SPARK-23311][SQL][TEST] add FilterFunction test case for test CombineTypedFilters ## What changes were proposed in this pull request? In the current test case for CombineTypedFilters, we lack the test of FilterFunction, so let's add it. In addition, in TypedFilterOptimizationSuite's existing test cases, Let's extract a common LocalRelation. ## How was this patch tested? add new test cases. Author: caoxuewen Closes #20482 from heary-cao/TypedFilterOptimizationSuite. (cherry picked from commit 63b49fa2e599080c2ba7d5189f9dde20a2e01fb4) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1bcb3728 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1bcb3728 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1bcb3728 Branch: refs/heads/branch-2.3 Commit: 1bcb3728db11be6e34060eff670fc8245ad571c6 Parents: b614c08 Author: caoxuewen Authored: Sat Feb 3 00:02:03 2018 -0800 Committer: gatorsmile Committed: Sat Feb 3 00:02:11 2018 -0800 -- .../apache/spark/sql/catalyst/dsl/package.scala | 3 + .../TypedFilterOptimizationSuite.scala | 95 +--- 2 files changed, 84 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1bcb3728/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 59cb26d..efb2eba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp} import scala.language.implicitConversions +import org.apache.spark.api.java.function.FilterFunction import org.apache.spark.sql.Encoder import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ @@ -301,6 +302,8 @@ package object dsl { def filter[T : Encoder](func: T => Boolean): LogicalPlan = TypedFilter(func, logicalPlan) + def filter[T : Encoder](func: FilterFunction[T]): LogicalPlan = TypedFilter(func, logicalPlan) + def serialize[T : Encoder]: LogicalPlan = CatalystSerde.serialize[T](logicalPlan) def deserialize[T : Encoder]: LogicalPlan = CatalystSerde.deserialize[T](logicalPlan) http://git-wip-us.apache.org/repos/asf/spark/blob/1bcb3728/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala index 56f096f..5fc99a3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import scala.reflect.runtime.universe.TypeTag +import org.apache.spark.api.java.function.FilterFunction import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder @@ -38,18 +39,19 @@ class TypedFilterOptimizationSuite extends PlanTest { implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]() + val testRelation = LocalRelation('_1.int, '_2.int) + test("filter after serialize with the same object type") { -val input = LocalRelation('_1.int, '_2.int) val f = (i: (Int, Int)) => i._1 > 0 -val query = input +val query = testRelation .deserialize[(Int, Int)] .serialize[(Int, Int)] .filter(f).analyze val optimized = Optimize.execute(query) -val expected = input +val expected = testRelation .deserialize[(Int, Int)] .where(callFunction(f, BooleanType, 'obj)) .serialize[(Int, Int)].analyze @@ -58,10 +60,9 @@ class TypedFilterOptimizationSuite extends PlanTest { } test("filter after serialize with different object types") { -val input = LocalRelation('_1.int, '_2.int) val f = (i: OtherTuple) => i._1 > 0 -val query = input +val query = testRelation .deserialize[(Int, Int)] .serialize[(Int, Int)] .filter(f).analyze @@ -70,17 +71,16 @@ class TypedFilterOptimizationSu
spark git commit: [SPARK-23311][SQL][TEST] add FilterFunction test case for test CombineTypedFilters
Repository: spark Updated Branches: refs/heads/master fe73cb4b4 -> 63b49fa2e [SPARK-23311][SQL][TEST] add FilterFunction test case for test CombineTypedFilters ## What changes were proposed in this pull request? In the current test case for CombineTypedFilters, we lack the test of FilterFunction, so let's add it. In addition, in TypedFilterOptimizationSuite's existing test cases, Let's extract a common LocalRelation. ## How was this patch tested? add new test cases. Author: caoxuewen Closes #20482 from heary-cao/TypedFilterOptimizationSuite. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/63b49fa2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/63b49fa2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/63b49fa2 Branch: refs/heads/master Commit: 63b49fa2e599080c2ba7d5189f9dde20a2e01fb4 Parents: fe73cb4 Author: caoxuewen Authored: Sat Feb 3 00:02:03 2018 -0800 Committer: gatorsmile Committed: Sat Feb 3 00:02:03 2018 -0800 -- .../apache/spark/sql/catalyst/dsl/package.scala | 3 + .../TypedFilterOptimizationSuite.scala | 95 +--- 2 files changed, 84 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/63b49fa2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 59cb26d..efb2eba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp} import scala.language.implicitConversions +import org.apache.spark.api.java.function.FilterFunction import org.apache.spark.sql.Encoder import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ @@ -301,6 +302,8 @@ package object dsl { def filter[T : Encoder](func: T => Boolean): LogicalPlan = TypedFilter(func, logicalPlan) + def filter[T : Encoder](func: FilterFunction[T]): LogicalPlan = TypedFilter(func, logicalPlan) + def serialize[T : Encoder]: LogicalPlan = CatalystSerde.serialize[T](logicalPlan) def deserialize[T : Encoder]: LogicalPlan = CatalystSerde.deserialize[T](logicalPlan) http://git-wip-us.apache.org/repos/asf/spark/blob/63b49fa2/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala index 56f096f..5fc99a3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import scala.reflect.runtime.universe.TypeTag +import org.apache.spark.api.java.function.FilterFunction import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder @@ -38,18 +39,19 @@ class TypedFilterOptimizationSuite extends PlanTest { implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]() + val testRelation = LocalRelation('_1.int, '_2.int) + test("filter after serialize with the same object type") { -val input = LocalRelation('_1.int, '_2.int) val f = (i: (Int, Int)) => i._1 > 0 -val query = input +val query = testRelation .deserialize[(Int, Int)] .serialize[(Int, Int)] .filter(f).analyze val optimized = Optimize.execute(query) -val expected = input +val expected = testRelation .deserialize[(Int, Int)] .where(callFunction(f, BooleanType, 'obj)) .serialize[(Int, Int)].analyze @@ -58,10 +60,9 @@ class TypedFilterOptimizationSuite extends PlanTest { } test("filter after serialize with different object types") { -val input = LocalRelation('_1.int, '_2.int) val f = (i: OtherTuple) => i._1 > 0 -val query = input +val query = testRelation .deserialize[(Int, Int)] .serialize[(Int, Int)] .filter(f).analyze @@ -70,17 +71,16 @@ class TypedFilterOptimizationSuite extends PlanTest { } test("filter before deserialize with the same object type") { -val i