This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 782ab8e [SPARK-33091][SQL] Avoid using map instead of foreach to avoid potential side effect at callers of OrcUtils.readCatalystSchema 782ab8e is described below commit 782ab8e244252696c50b4b432d07a56c374b8680 Author: HyukjinKwon <gurwls...@apache.org> AuthorDate: Thu Oct 8 16:29:15 2020 +0900 [SPARK-33091][SQL] Avoid using map instead of foreach to avoid potential side effect at callers of OrcUtils.readCatalystSchema ### What changes were proposed in this pull request? This is a kind of a followup of SPARK-32646. New JIRA was filed to control the fixed versions properly. When you use `map`, it might be lazily evaluated and not executed. To avoid this, we should better use `foreach`. See also SPARK-16694. Current codes look not causing any bug for now but it should be best to fix to avoid potential issues. ### Why are the changes needed? To avoid potential issues from `map` being lazy and not executed. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Ran related tests. CI in this PR should verify. Closes #29974 from HyukjinKwon/SPARK-32646. Authored-by: HyukjinKwon <gurwls...@apache.org> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> (cherry picked from commit 5effa8ea261ba59214afedc2853d1b248b330ca6) Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- .../org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala | 2 +- .../sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala index 69badb4..c540007 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala @@ -185,7 +185,7 @@ class OrcFileFormat } else { // ORC predicate pushdown if (orcFilterPushDown) { - OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).map { fileSchema => + OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema => OrcFilters.createFilter(fileSchema, filters).foreach { f => OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala index 1f38128..b0ddee0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala @@ -69,7 +69,7 @@ case class OrcPartitionReaderFactory( private def pushDownPredicates(filePath: Path, conf: Configuration): Unit = { if (orcFilterPushDown) { - OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).map { fileSchema => + OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema => OrcFilters.createFilter(fileSchema, filters).foreach { f => OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org