spark git commit: [SPARK-22122][SQL] Use analyzed logical plans to count input rows in TPCDSQueryBenchmark
Repository: spark Updated Branches: refs/heads/master 530fe6832 -> c6610a997 [SPARK-22122][SQL] Use analyzed logical plans to count input rows in TPCDSQueryBenchmark ## What changes were proposed in this pull request? Since the current code ignores WITH clauses to check input relations in TPCDS queries, this leads to inaccurate per-row processing time for benchmark results. For example, in `q2`, this fix could catch all the input relations: `web_sales`, `date_dim`, and `catalog_sales` (the current code catches `date_dim` only). The one-third of the TPCDS queries uses WITH clauses, so I think it is worth fixing this. ## How was this patch tested? Manually checked. Author: Takeshi YamamuroCloses #19344 from maropu/RespectWithInTPCDSBench. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c6610a99 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c6610a99 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c6610a99 Branch: refs/heads/master Commit: c6610a997f69148a1f1bbf69360e8f39e24cb70a Parents: 530fe68 Author: Takeshi Yamamuro Authored: Fri Sep 29 21:36:52 2017 -0700 Committer: gatorsmile Committed: Fri Sep 29 21:36:52 2017 -0700 -- .../benchmark/TPCDSQueryBenchmark.scala | 32 +++- 1 file changed, 11 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c6610a99/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index 99c6df7..69247d7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -20,11 +20,10 @@ package org.apache.spark.sql.execution.benchmark import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation -import org.apache.spark.sql.catalyst.expressions.SubqueryExpression -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.catalog.HiveTableRelation +import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.util.Benchmark /** @@ -66,24 +65,15 @@ object TPCDSQueryBenchmark extends Logging { classLoader = Thread.currentThread().getContextClassLoader) // This is an indirect hack to estimate the size of each query's input by traversing the - // logical plan and adding up the sizes of all tables that appear in the plan. Note that this - // currently doesn't take WITH subqueries into account which might lead to fairly inaccurate - // per-row processing time for those cases. + // logical plan and adding up the sizes of all tables that appear in the plan. val queryRelations = scala.collection.mutable.HashSet[String]() - spark.sql(queryString).queryExecution.logical.map { -case UnresolvedRelation(t: TableIdentifier) => - queryRelations.add(t.table) -case lp: LogicalPlan => - lp.expressions.foreach { _ foreach { -case subquery: SubqueryExpression => - subquery.plan.foreach { -case UnresolvedRelation(t: TableIdentifier) => - queryRelations.add(t.table) -case _ => - } -case _ => - } -} + spark.sql(queryString).queryExecution.analyzed.foreach { +case SubqueryAlias(alias, _: LogicalRelation) => + queryRelations.add(alias) +case LogicalRelation(_, _, Some(catalogTable), _) => + queryRelations.add(catalogTable.identifier.table) +case HiveTableRelation(tableMeta, _, _) => + queryRelations.add(tableMeta.identifier.table) case _ => } val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-21904][SQL] Rename tempTables to tempViews in SessionCatalog
Repository: spark Updated Branches: refs/heads/master 472864014 -> 530fe6832 [SPARK-21904][SQL] Rename tempTables to tempViews in SessionCatalog ### What changes were proposed in this pull request? `tempTables` is not right. To be consistent, we need to rename the internal variable names/comments to tempViews in SessionCatalog too. ### How was this patch tested? N/A Author: gatorsmileCloses #19117 from gatorsmile/renameTempTablesToTempViews. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/530fe683 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/530fe683 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/530fe683 Branch: refs/heads/master Commit: 530fe683297cb11b920a4df6630eff5d7e7ddce2 Parents: 4728640 Author: gatorsmile Authored: Fri Sep 29 19:35:32 2017 -0700 Committer: gatorsmile Committed: Fri Sep 29 19:35:32 2017 -0700 -- .../sql/catalyst/catalog/SessionCatalog.scala | 79 ++-- .../spark/sql/execution/command/DDLSuite.scala | 10 +-- 2 files changed, 43 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/530fe683/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 9407b72..6ba9ee5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.catalog -import java.lang.reflect.InvocationTargetException import java.net.URI import java.util.Locale import java.util.concurrent.Callable @@ -25,7 +24,6 @@ import javax.annotation.concurrent.GuardedBy import scala.collection.mutable import scala.util.{Failure, Success, Try} -import scala.util.control.NonFatal import com.google.common.cache.{Cache, CacheBuilder} import org.apache.hadoop.conf.Configuration @@ -41,7 +39,6 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias, View} import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils @@ -52,7 +49,7 @@ object SessionCatalog { /** * An internal catalog that is used by a Spark Session. This internal catalog serves as a * proxy to the underlying metastore (e.g. Hive Metastore) and it also manages temporary - * tables and functions of the Spark Session that it belongs to. + * views and functions of the Spark Session that it belongs to. * * This class must be thread-safe. */ @@ -90,13 +87,13 @@ class SessionCatalog( new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) } - /** List of temporary tables, mapping from table name to their logical plan. */ + /** List of temporary views, mapping from table name to their logical plan. */ @GuardedBy("this") - protected val tempTables = new mutable.HashMap[String, LogicalPlan] + protected val tempViews = new mutable.HashMap[String, LogicalPlan] // Note: we track current database here because certain operations do not explicitly // specify the database (e.g. DROP TABLE my_table). In these cases we must first - // check whether the temporary table or function exists, then, if not, operate on + // check whether the temporary view or function exists, then, if not, operate on // the corresponding item in the current database. @GuardedBy("this") protected var currentDb: String = formatDatabaseName(DEFAULT_DATABASE) @@ -272,8 +269,8 @@ class SessionCatalog( // // Tables // - // There are two kinds of tables, temporary tables and metastore tables. - // Temporary tables are isolated across sessions and do not belong to any + // There are two kinds of tables, temporary views and metastore tables. + // Temporary views are isolated across sessions and do not belong to any // particular database. Metastore tables can be used across multiple // sessions as their metadata is persisted in the underlying catalog. // @@ -462,10
spark git commit: Revert "[SPARK-22142][BUILD][STREAMING] Move Flume support behind a profile"
Repository: spark Updated Branches: refs/heads/master 9ed7394a6 -> 472864014 Revert "[SPARK-22142][BUILD][STREAMING] Move Flume support behind a profile" This reverts commit a2516f41aef68e39df7f6380fd2618cc148a609e. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47286401 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47286401 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47286401 Branch: refs/heads/master Commit: 472864014c42da08b9d3f3fffbe657c6fcf1e2ef Parents: 9ed7394 Author: gatorsmileAuthored: Fri Sep 29 11:45:58 2017 -0700 Committer: gatorsmile Committed: Fri Sep 29 11:45:58 2017 -0700 -- dev/create-release/release-build.sh | 4 ++-- dev/mima| 2 +- dev/scalastyle | 1 - dev/sparktestsupport/modules.py | 20 +--- dev/test-dependencies.sh| 2 +- docs/building-spark.md | 6 -- pom.xml | 13 +++-- project/SparkBuild.scala| 17 - python/pyspark/streaming/tests.py | 16 +++- 9 files changed, 19 insertions(+), 62 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 7e8d5c7..5390f59 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -84,9 +84,9 @@ MVN="build/mvn --force" # Hive-specific profiles for some builds HIVE_PROFILES="-Phive -Phive-thriftserver" # Profiles for publishing snapshots and release to Maven Central -PUBLISH_PROFILES="-Pmesos -Pyarn -Pflume $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl" +PUBLISH_PROFILES="-Pmesos -Pyarn $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl" # Profiles for building binary releases -BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Pflume -Psparkr" +BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Psparkr" # Scala 2.11 only profiles for some builds SCALA_2_11_PROFILES="-Pkafka-0-8" # Scala 2.12 only profiles for some builds http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/mima -- diff --git a/dev/mima b/dev/mima index 1e3ca97..fdb21f5 100755 --- a/dev/mima +++ b/dev/mima @@ -24,7 +24,7 @@ set -e FWDIR="$(cd "`dirname "$0"`"/..; pwd)" cd "$FWDIR" -SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pflume -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive" +SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive" TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)" OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)" http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/scalastyle -- diff --git a/dev/scalastyle b/dev/scalastyle index 89ecc8a..e5aa589 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -25,7 +25,6 @@ ERRORS=$(echo -e "q\n" \ -Pmesos \ -Pkafka-0-8 \ -Pyarn \ --Pflume \ -Phive \ -Phive-thriftserver \ scalastyle test:scalastyle \ http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/sparktestsupport/modules.py -- diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 91d5667..50e14b6 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -279,12 +279,6 @@ streaming_flume_sink = Module( source_file_regexes=[ "external/flume-sink", ], -build_profile_flags=[ -"-Pflume", -], -environ={ -"ENABLE_FLUME_TESTS": "1" -}, sbt_test_goals=[ "streaming-flume-sink/test", ] @@ -297,12 +291,6 @@ streaming_flume = Module( source_file_regexes=[ "external/flume", ], -build_profile_flags=[ -"-Pflume", -], -environ={ -"ENABLE_FLUME_TESTS": "1" -}, sbt_test_goals=[ "streaming-flume/test", ] @@ -314,13 +302,7 @@ streaming_flume_assembly = Module( dependencies=[streaming_flume, streaming_flume_sink], source_file_regexes=[ "external/flume-assembly", -], -build_profile_flags=[ -"-Pflume", -], -environ={ -"ENABLE_FLUME_TESTS": "1" -} +] ) http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/test-dependencies.sh
spark git commit: [SPARK-22146] FileNotFoundException while reading ORC files containing special characters
Repository: spark Updated Branches: refs/heads/branch-2.2 ac9a0f692 -> 7bf25e086 [SPARK-22146] FileNotFoundException while reading ORC files containing special characters ## What changes were proposed in this pull request? Reading ORC files containing special characters like '%' fails with a FileNotFoundException. This PR aims to fix the problem. ## How was this patch tested? Added UT. Author: Marco GaidoAuthor: Marco Gaido Closes #19368 from mgaido91/SPARK-22146. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7bf25e08 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7bf25e08 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7bf25e08 Branch: refs/heads/branch-2.2 Commit: 7bf25e086729782c62b8189e7417b86fa720553d Parents: ac9a0f6 Author: Marco Gaido Authored: Thu Sep 28 23:14:53 2017 -0700 Committer: gatorsmile Committed: Fri Sep 29 09:05:15 2017 -0700 -- .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala | 2 +- .../spark/sql/hive/MetastoreDataSourcesSuite.scala | 12 +++- 2 files changed, 12 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7bf25e08/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index 3a34ec5..6b76cfa 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -58,7 +58,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable options: Map[String, String], files: Seq[FileStatus]): Option[StructType] = { OrcFileOperator.readSchema( - files.map(_.getPath.toUri.toString), + files.map(_.getPath.toString), Some(sparkSession.sessionState.newHadoopConf()) ) } http://git-wip-us.apache.org/repos/asf/spark/blob/7bf25e08/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 07d641d..32e97eb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -998,7 +998,6 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv spark.sql("""drop database if exists testdb8156 CASCADE""") } - test("skip hive metadata on table creation") { withTempDir { tempPath => val schema = StructType((1 to 5).map(i => StructField(s"c_$i", StringType))) @@ -1350,6 +1349,17 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv } } + Seq("orc", "parquet", "csv", "json", "text").foreach { format => +test(s"SPARK-22146: read files containing special characters using $format") { + val nameWithSpecialChars = s"sp%chars" + withTempDir { dir => +val tmpFile = s"$dir/$nameWithSpecialChars" +spark.createDataset(Seq("a", "b")).write.format(format).save(tmpFile) +spark.read.format(format).load(tmpFile) + } +} + } + private def withDebugMode(f: => Unit): Unit = { val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE) try { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: Preparing Spark release v2.1.2-rc3
Repository: spark Updated Branches: refs/heads/branch-2.1 60f78c20c -> 78661f95e Preparing Spark release v2.1.2-rc3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/efdbef41 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/efdbef41 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/efdbef41 Branch: refs/heads/branch-2.1 Commit: efdbef412cb34d6018d5c2dfce2f85c5eb1587f1 Parents: 60f78c2 Author: Holden KarauAuthored: Fri Sep 29 09:04:26 2017 -0700 Committer: Holden Karau Committed: Fri Sep 29 09:04:26 2017 -0700 -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mesos/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 39 files changed, 40 insertions(+), 40 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 6c380b6..899d410 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.1.3 +Version: 2.1.2 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index e9f915a..133f8e6 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.1.3-SNAPSHOT +2.1.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7e203e7..d2631e4 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.3-SNAPSHOT +2.1.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 92dd275..c12d480 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.3-SNAPSHOT +2.1.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index abca418..d22db36 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.1.2-rc3 [created] efdbef412 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 2.1.3-SNAPSHOT
Preparing development version 2.1.3-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78661f95 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78661f95 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78661f95 Branch: refs/heads/branch-2.1 Commit: 78661f95e8db059d64aee377846a6c8e892e31ec Parents: efdbef4 Author: Holden KarauAuthored: Fri Sep 29 09:04:35 2017 -0700 Committer: Holden Karau Committed: Fri Sep 29 09:04:35 2017 -0700 -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mesos/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 39 files changed, 40 insertions(+), 40 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 899d410..6c380b6 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.1.2 +Version: 2.1.3 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 133f8e6..e9f915a 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.1.2 +2.1.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index d2631e4..7e203e7 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.2 +2.1.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c12d480..92dd275 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.2 +2.1.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index d22db36..abca418 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.2 +2.1.3-SNAPSHOT
spark git commit: [SPARK-22161][SQL] Add Impala-modified TPC-DS queries
Repository: spark Updated Branches: refs/heads/branch-2.2 8b2d8385c -> ac9a0f692 [SPARK-22161][SQL] Add Impala-modified TPC-DS queries ## What changes were proposed in this pull request? Added IMPALA-modified TPCDS queries to TPC-DS query suites. - Ref: https://github.com/cloudera/impala-tpcds-kit/tree/master/queries ## How was this patch tested? N/A Author: gatorsmileCloses #19386 from gatorsmile/addImpalaQueries. (cherry picked from commit 9ed7394a68315126b2dd00e53a444cc65b5a62ea) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac9a0f69 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac9a0f69 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac9a0f69 Branch: refs/heads/branch-2.2 Commit: ac9a0f6923a72ec8f92fe88760cf50a67497b666 Parents: 8b2d838 Author: gatorsmile Authored: Fri Sep 29 08:59:42 2017 -0700 Committer: gatorsmile Committed: Fri Sep 29 09:00:15 2017 -0700 -- .../resources/tpcds-modifiedQueries/q10.sql | 70 ++ .../resources/tpcds-modifiedQueries/q19.sql | 38 .../resources/tpcds-modifiedQueries/q27.sql | 43 .../test/resources/tpcds-modifiedQueries/q3.sql | 228 +++ .../resources/tpcds-modifiedQueries/q34.sql | 45 .../resources/tpcds-modifiedQueries/q42.sql | 28 +++ .../resources/tpcds-modifiedQueries/q43.sql | 36 +++ .../resources/tpcds-modifiedQueries/q46.sql | 80 +++ .../resources/tpcds-modifiedQueries/q52.sql | 27 +++ .../resources/tpcds-modifiedQueries/q53.sql | 37 +++ .../resources/tpcds-modifiedQueries/q55.sql | 24 ++ .../resources/tpcds-modifiedQueries/q59.sql | 83 +++ .../resources/tpcds-modifiedQueries/q63.sql | 29 +++ .../resources/tpcds-modifiedQueries/q65.sql | 58 + .../resources/tpcds-modifiedQueries/q68.sql | 62 + .../test/resources/tpcds-modifiedQueries/q7.sql | 31 +++ .../resources/tpcds-modifiedQueries/q73.sql | 49 .../resources/tpcds-modifiedQueries/q79.sql | 59 + .../resources/tpcds-modifiedQueries/q89.sql | 43 .../resources/tpcds-modifiedQueries/q98.sql | 32 +++ .../resources/tpcds-modifiedQueries/ss_max.sql | 14 ++ .../org/apache/spark/sql/TPCDSQuerySuite.scala | 26 ++- 22 files changed, 1141 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql -- diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql new file mode 100755 index 000..79dd3d5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql @@ -0,0 +1,70 @@ +-- start query 10 in stream 0 using template query10.tpl +with +v1 as ( + select + ws_bill_customer_sk as customer_sk + from web_sales, + date_dim + where ws_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy between 4 and 4+3 + union all + select +cs_ship_customer_sk as customer_sk + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy between 4 and 4+3 +), +v2 as ( + select +ss_customer_sk as customer_sk + from store_sales, + date_dim + where ss_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy between 4 and 4+3 +) +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 +from customer c +join customer_address ca on (c.c_current_addr_sk = ca.ca_address_sk) +join customer_demographics on (cd_demo_sk = c.c_current_cdemo_sk) +left semi join v1 on (v1.customer_sk = c.c_customer_sk) +left semi join v2 on (v2.customer_sk = c.c_customer_sk) +where + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') +group by + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +order by + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +-- end query 10 in stream 0 using template query10.tpl http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql
spark git commit: [SPARK-22129][SPARK-22138] Release script improvements
Repository: spark Updated Branches: refs/heads/branch-2.2 8c5ab4e10 -> 8b2d8385c [SPARK-22129][SPARK-22138] Release script improvements ## What changes were proposed in this pull request? Use the GPG_KEY param, fix lsof to non-hardcoded path, remove version swap since it wasn't really needed. Use EXPORT on JAVA_HOME for downstream scripts as well. ## How was this patch tested? Rolled 2.1.2 RC2 Author: Holden KarauCloses #19359 from holdenk/SPARK-22129-fix-signing. (cherry picked from commit ecbe416ab5001b32737966c5a2407597a1dafc32) Signed-off-by: Holden Karau Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8b2d8385 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8b2d8385 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8b2d8385 Branch: refs/heads/branch-2.2 Commit: 8b2d8385ca8d065c07938ebde434d189416530e2 Parents: 8c5ab4e Author: Holden Karau Authored: Fri Sep 29 08:04:14 2017 -0700 Committer: Holden Karau Committed: Fri Sep 29 08:04:26 2017 -0700 -- dev/create-release/release-build.sh | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8b2d8385/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index f93a96b..819f325 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -74,7 +74,7 @@ GIT_REF=${GIT_REF:-master} # Destination directory parent on remote server REMOTE_PARENT_DIR=${REMOTE_PARENT_DIR:-/home/$ASF_USERNAME/public_html} -GPG="gpg --no-tty --batch" +GPG="gpg -u $GPG_KEY --no-tty --batch" NEXUS_ROOT=https://repository.apache.org/service/local/staging NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads BASE_DIR=$(pwd) @@ -116,7 +116,7 @@ else echo "Please set JAVA_HOME correctly." exit 1 else - JAVA_HOME="$JAVA_7_HOME" + export JAVA_HOME="$JAVA_7_HOME" fi fi fi @@ -131,7 +131,7 @@ DEST_DIR_NAME="spark-$SPARK_PACKAGE_VERSION" function LFTP { SSH="ssh -o ConnectTimeout=300 -o StrictHostKeyChecking=no -i $ASF_RSA_KEY" COMMANDS=$(cat
spark git commit: [SPARK-22129][SPARK-22138] Release script improvements
Repository: spark Updated Branches: refs/heads/branch-2.1 361aa0efc -> 60f78c20c [SPARK-22129][SPARK-22138] Release script improvements ## What changes were proposed in this pull request? Use the GPG_KEY param, fix lsof to non-hardcoded path, remove version swap since it wasn't really needed. Use EXPORT on JAVA_HOME for downstream scripts as well. ## How was this patch tested? Rolled 2.1.2 RC2 Author: Holden KarauCloses #19359 from holdenk/SPARK-22129-fix-signing. (cherry picked from commit ecbe416ab5001b32737966c5a2407597a1dafc32) Signed-off-by: Holden Karau Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/60f78c20 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/60f78c20 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/60f78c20 Branch: refs/heads/branch-2.1 Commit: 60f78c20c6b0300c08c5f5329a559b3d3225fa68 Parents: 361aa0e Author: Holden Karau Authored: Fri Sep 29 08:04:14 2017 -0700 Committer: Holden Karau Committed: Fri Sep 29 08:04:38 2017 -0700 -- dev/create-release/release-build.sh | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/60f78c20/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index fa889d9..ad32c31 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -74,7 +74,7 @@ GIT_REF=${GIT_REF:-master} # Destination directory parent on remote server REMOTE_PARENT_DIR=${REMOTE_PARENT_DIR:-/home/$ASF_USERNAME/public_html} -GPG="gpg --no-tty --batch" +GPG="gpg -u $GPG_KEY --no-tty --batch" NEXUS_ROOT=https://repository.apache.org/service/local/staging NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads BASE_DIR=$(pwd) @@ -116,7 +116,7 @@ else echo "Please set JAVA_HOME correctly." exit 1 else - JAVA_HOME="$JAVA_7_HOME" + export JAVA_HOME="$JAVA_7_HOME" fi fi fi @@ -131,7 +131,7 @@ DEST_DIR_NAME="spark-$SPARK_PACKAGE_VERSION" function LFTP { SSH="ssh -o ConnectTimeout=300 -o StrictHostKeyChecking=no -i $ASF_RSA_KEY" COMMANDS=$(cat
spark git commit: [SPARK-22129][SPARK-22138] Release script improvements
Repository: spark Updated Branches: refs/heads/master a2516f41a -> ecbe416ab [SPARK-22129][SPARK-22138] Release script improvements ## What changes were proposed in this pull request? Use the GPG_KEY param, fix lsof to non-hardcoded path, remove version swap since it wasn't really needed. Use EXPORT on JAVA_HOME for downstream scripts as well. ## How was this patch tested? Rolled 2.1.2 RC2 Author: Holden KarauCloses #19359 from holdenk/SPARK-22129-fix-signing. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ecbe416a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ecbe416a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ecbe416a Branch: refs/heads/master Commit: ecbe416ab5001b32737966c5a2407597a1dafc32 Parents: a2516f4 Author: Holden Karau Authored: Fri Sep 29 08:04:14 2017 -0700 Committer: Holden Karau Committed: Fri Sep 29 08:04:14 2017 -0700 -- dev/create-release/release-build.sh | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ecbe416a/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index c548a0a..7e8d5c7 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -74,7 +74,7 @@ GIT_REF=${GIT_REF:-master} # Destination directory parent on remote server REMOTE_PARENT_DIR=${REMOTE_PARENT_DIR:-/home/$ASF_USERNAME/public_html} -GPG="gpg --no-tty --batch" +GPG="gpg -u $GPG_KEY --no-tty --batch" NEXUS_ROOT=https://repository.apache.org/service/local/staging NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads BASE_DIR=$(pwd) @@ -125,7 +125,7 @@ else echo "Please set JAVA_HOME correctly." exit 1 else - JAVA_HOME="$JAVA_7_HOME" + export JAVA_HOME="$JAVA_7_HOME" fi fi fi @@ -140,7 +140,7 @@ DEST_DIR_NAME="spark-$SPARK_PACKAGE_VERSION" function LFTP { SSH="ssh -o ConnectTimeout=300 -o StrictHostKeyChecking=no -i $ASF_RSA_KEY" COMMANDS=$(cat
spark git commit: [SPARK-22142][BUILD][STREAMING] Move Flume support behind a profile
Repository: spark Updated Branches: refs/heads/master 0fa4dbe4f -> a2516f41a [SPARK-22142][BUILD][STREAMING] Move Flume support behind a profile ## What changes were proposed in this pull request? Add 'flume' profile to enable Flume-related integration modules ## How was this patch tested? Existing tests; no functional change Author: Sean OwenCloses #19365 from srowen/SPARK-22142. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2516f41 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2516f41 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2516f41 Branch: refs/heads/master Commit: a2516f41aef68e39df7f6380fd2618cc148a609e Parents: 0fa4dbe Author: Sean Owen Authored: Fri Sep 29 08:26:53 2017 +0100 Committer: Sean Owen Committed: Fri Sep 29 08:26:53 2017 +0100 -- dev/create-release/release-build.sh | 4 ++-- dev/mima| 2 +- dev/scalastyle | 1 + dev/sparktestsupport/modules.py | 20 +++- dev/test-dependencies.sh| 2 +- docs/building-spark.md | 6 ++ pom.xml | 13 ++--- project/SparkBuild.scala| 17 + python/pyspark/streaming/tests.py | 16 +--- 9 files changed, 62 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a2516f41/dev/create-release/release-build.sh -- diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 8de1d6a..c548a0a 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -84,9 +84,9 @@ MVN="build/mvn --force" # Hive-specific profiles for some builds HIVE_PROFILES="-Phive -Phive-thriftserver" # Profiles for publishing snapshots and release to Maven Central -PUBLISH_PROFILES="-Pmesos -Pyarn $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl" +PUBLISH_PROFILES="-Pmesos -Pyarn -Pflume $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl" # Profiles for building binary releases -BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Psparkr" +BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Pflume -Psparkr" # Scala 2.11 only profiles for some builds SCALA_2_11_PROFILES="-Pkafka-0-8" # Scala 2.12 only profiles for some builds http://git-wip-us.apache.org/repos/asf/spark/blob/a2516f41/dev/mima -- diff --git a/dev/mima b/dev/mima index fdb21f5..1e3ca97 100755 --- a/dev/mima +++ b/dev/mima @@ -24,7 +24,7 @@ set -e FWDIR="$(cd "`dirname "$0"`"/..; pwd)" cd "$FWDIR" -SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive" +SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pflume -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive" TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)" OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)" http://git-wip-us.apache.org/repos/asf/spark/blob/a2516f41/dev/scalastyle -- diff --git a/dev/scalastyle b/dev/scalastyle index e5aa589..89ecc8a 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -25,6 +25,7 @@ ERRORS=$(echo -e "q\n" \ -Pmesos \ -Pkafka-0-8 \ -Pyarn \ +-Pflume \ -Phive \ -Phive-thriftserver \ scalastyle test:scalastyle \ http://git-wip-us.apache.org/repos/asf/spark/blob/a2516f41/dev/sparktestsupport/modules.py -- diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 50e14b6..91d5667 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -279,6 +279,12 @@ streaming_flume_sink = Module( source_file_regexes=[ "external/flume-sink", ], +build_profile_flags=[ +"-Pflume", +], +environ={ +"ENABLE_FLUME_TESTS": "1" +}, sbt_test_goals=[ "streaming-flume-sink/test", ] @@ -291,6 +297,12 @@ streaming_flume = Module( source_file_regexes=[ "external/flume", ], +build_profile_flags=[ +"-Pflume", +], +environ={ +"ENABLE_FLUME_TESTS": "1" +}, sbt_test_goals=[ "streaming-flume/test", ] @@ -302,7 +314,13 @@ streaming_flume_assembly = Module( dependencies=[streaming_flume, streaming_flume_sink], source_file_regexes=[ "external/flume-assembly", -] +], +build_profile_flags=[ +
spark git commit: [SPARK-22141][FOLLOWUP][SQL] Add comments for the order of batches
Repository: spark Updated Branches: refs/heads/master 161ba7eaa -> 0fa4dbe4f [SPARK-22141][FOLLOWUP][SQL] Add comments for the order of batches ## What changes were proposed in this pull request? Add comments for specifying the position of batch "Check Cartesian Products", as rxin suggested in https://github.com/apache/spark/pull/19362 . ## How was this patch tested? Unit test Author: Wang GengliangCloses #19379 from gengliangwang/SPARK-22141-followup. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0fa4dbe4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0fa4dbe4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0fa4dbe4 Branch: refs/heads/master Commit: 0fa4dbe4f4d7b988be2105b46590b5207f7c8121 Parents: 161ba7e Author: Wang Gengliang Authored: Thu Sep 28 23:23:30 2017 -0700 Committer: gatorsmile Committed: Thu Sep 28 23:23:30 2017 -0700 -- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0fa4dbe4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index a391c51..b9fa39d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -134,6 +134,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) Batch("LocalRelation", fixedPoint, ConvertToLocalRelation, PropagateEmptyRelation) :: +// The following batch should be executed after batch "Join Reorder" and "LocalRelation". Batch("Check Cartesian Products", Once, CheckCartesianProducts) :: Batch("OptimizeCodegen", Once, @@ -1089,6 +1090,9 @@ object CombineLimits extends Rule[LogicalPlan] { * SELECT * from R, S where R.r = S.s, * the join between R and S is not a cartesian product and therefore should be allowed. * The predicate R.r = S.s is not recognized as a join condition until the ReorderJoin rule. + * + * This rule must be run AFTER the batch "LocalRelation", since a join with empty relation should + * not be a cartesian product. */ object CheckCartesianProducts extends Rule[LogicalPlan] with PredicateHelper { /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22146] FileNotFoundException while reading ORC files containing special characters
Repository: spark Updated Branches: refs/heads/master 323806e68 -> 161ba7eaa [SPARK-22146] FileNotFoundException while reading ORC files containing special characters ## What changes were proposed in this pull request? Reading ORC files containing special characters like '%' fails with a FileNotFoundException. This PR aims to fix the problem. ## How was this patch tested? Added UT. Author: Marco GaidoAuthor: Marco Gaido Closes #19368 from mgaido91/SPARK-22146. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/161ba7ea Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/161ba7ea Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/161ba7ea Branch: refs/heads/master Commit: 161ba7eaa4539f0a7f20d9e2a493e0e323ca5249 Parents: 323806e Author: Marco Gaido Authored: Thu Sep 28 23:14:53 2017 -0700 Committer: gatorsmile Committed: Thu Sep 28 23:14:53 2017 -0700 -- .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala | 2 +- .../spark/sql/hive/MetastoreDataSourcesSuite.scala | 12 +++- 2 files changed, 12 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/161ba7ea/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index 4d92a67..c76f0eb 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -58,7 +58,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable options: Map[String, String], files: Seq[FileStatus]): Option[StructType] = { OrcFileOperator.readSchema( - files.map(_.getPath.toUri.toString), + files.map(_.getPath.toString), Some(sparkSession.sessionState.newHadoopConf()) ) } http://git-wip-us.apache.org/repos/asf/spark/blob/161ba7ea/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 29b0e6c..f5d41c9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -993,7 +993,6 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv spark.sql("""drop database if exists testdb8156 CASCADE""") } - test("skip hive metadata on table creation") { withTempDir { tempPath => val schema = StructType((1 to 5).map(i => StructField(s"c_$i", StringType))) @@ -1345,6 +1344,17 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv } } + Seq("orc", "parquet", "csv", "json", "text").foreach { format => +test(s"SPARK-22146: read files containing special characters using $format") { + val nameWithSpecialChars = s"sp%chars" + withTempDir { dir => +val tmpFile = s"$dir/$nameWithSpecialChars" +spark.createDataset(Seq("a", "b")).write.format(format).save(tmpFile) +spark.read.format(format).load(tmpFile) + } +} + } + private def withDebugMode(f: => Unit): Unit = { val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE) try { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org