spark git commit: [SPARK-22122][SQL] Use analyzed logical plans to count input rows in TPCDSQueryBenchmark

2017-09-29 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 530fe6832 -> c6610a997


[SPARK-22122][SQL] Use analyzed logical plans to count input rows in 
TPCDSQueryBenchmark

## What changes were proposed in this pull request?
Since the current code ignores WITH clauses to check input relations in TPCDS 
queries, this leads to inaccurate per-row processing time for benchmark 
results. For example, in `q2`, this fix could catch all the input relations: 
`web_sales`, `date_dim`, and `catalog_sales` (the current code catches 
`date_dim` only). The one-third of the TPCDS queries uses WITH clauses, so I 
think it is worth fixing this.

## How was this patch tested?
Manually checked.

Author: Takeshi Yamamuro 

Closes #19344 from maropu/RespectWithInTPCDSBench.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c6610a99
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c6610a99
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c6610a99

Branch: refs/heads/master
Commit: c6610a997f69148a1f1bbf69360e8f39e24cb70a
Parents: 530fe68
Author: Takeshi Yamamuro 
Authored: Fri Sep 29 21:36:52 2017 -0700
Committer: gatorsmile 
Committed: Fri Sep 29 21:36:52 2017 -0700

--
 .../benchmark/TPCDSQueryBenchmark.scala | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c6610a99/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index 99c6df7..69247d7 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -20,11 +20,10 @@ package org.apache.spark.sql.execution.benchmark
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
+import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.util.Benchmark
 
 /**
@@ -66,24 +65,15 @@ object TPCDSQueryBenchmark extends Logging {
 classLoader = Thread.currentThread().getContextClassLoader)
 
   // This is an indirect hack to estimate the size of each query's input 
by traversing the
-  // logical plan and adding up the sizes of all tables that appear in the 
plan. Note that this
-  // currently doesn't take WITH subqueries into account which might lead 
to fairly inaccurate
-  // per-row processing time for those cases.
+  // logical plan and adding up the sizes of all tables that appear in the 
plan.
   val queryRelations = scala.collection.mutable.HashSet[String]()
-  spark.sql(queryString).queryExecution.logical.map {
-case UnresolvedRelation(t: TableIdentifier) =>
-  queryRelations.add(t.table)
-case lp: LogicalPlan =>
-  lp.expressions.foreach { _ foreach {
-case subquery: SubqueryExpression =>
-  subquery.plan.foreach {
-case UnresolvedRelation(t: TableIdentifier) =>
-  queryRelations.add(t.table)
-case _ =>
-  }
-case _ =>
-  }
-}
+  spark.sql(queryString).queryExecution.analyzed.foreach {
+case SubqueryAlias(alias, _: LogicalRelation) =>
+  queryRelations.add(alias)
+case LogicalRelation(_, _, Some(catalogTable), _) =>
+  queryRelations.add(catalogTable.identifier.table)
+case HiveTableRelation(tableMeta, _, _) =>
+  queryRelations.add(tableMeta.identifier.table)
 case _ =>
   }
   val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-21904][SQL] Rename tempTables to tempViews in SessionCatalog

2017-09-29 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 472864014 -> 530fe6832


[SPARK-21904][SQL] Rename tempTables to tempViews in SessionCatalog

### What changes were proposed in this pull request?
`tempTables` is not right. To be consistent, we need to rename the internal 
variable names/comments to tempViews in SessionCatalog too.

### How was this patch tested?
N/A

Author: gatorsmile 

Closes #19117 from gatorsmile/renameTempTablesToTempViews.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/530fe683
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/530fe683
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/530fe683

Branch: refs/heads/master
Commit: 530fe683297cb11b920a4df6630eff5d7e7ddce2
Parents: 4728640
Author: gatorsmile 
Authored: Fri Sep 29 19:35:32 2017 -0700
Committer: gatorsmile 
Committed: Fri Sep 29 19:35:32 2017 -0700

--
 .../sql/catalyst/catalog/SessionCatalog.scala   | 79 ++--
 .../spark/sql/execution/command/DDLSuite.scala  | 10 +--
 2 files changed, 43 insertions(+), 46 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/530fe683/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 9407b72..6ba9ee5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.catalog
 
-import java.lang.reflect.InvocationTargetException
 import java.net.URI
 import java.util.Locale
 import java.util.concurrent.Callable
@@ -25,7 +24,6 @@ import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
 import scala.util.{Failure, Success, Try}
-import scala.util.control.NonFatal
 
 import com.google.common.cache.{Cache, CacheBuilder}
 import org.apache.hadoop.conf.Configuration
@@ -41,7 +39,6 @@ import 
org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, 
SubqueryAlias, View}
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
@@ -52,7 +49,7 @@ object SessionCatalog {
 /**
  * An internal catalog that is used by a Spark Session. This internal catalog 
serves as a
  * proxy to the underlying metastore (e.g. Hive Metastore) and it also manages 
temporary
- * tables and functions of the Spark Session that it belongs to.
+ * views and functions of the Spark Session that it belongs to.
  *
  * This class must be thread-safe.
  */
@@ -90,13 +87,13 @@ class SessionCatalog(
   new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
   }
 
-  /** List of temporary tables, mapping from table name to their logical plan. 
*/
+  /** List of temporary views, mapping from table name to their logical plan. 
*/
   @GuardedBy("this")
-  protected val tempTables = new mutable.HashMap[String, LogicalPlan]
+  protected val tempViews = new mutable.HashMap[String, LogicalPlan]
 
   // Note: we track current database here because certain operations do not 
explicitly
   // specify the database (e.g. DROP TABLE my_table). In these cases we must 
first
-  // check whether the temporary table or function exists, then, if not, 
operate on
+  // check whether the temporary view or function exists, then, if not, 
operate on
   // the corresponding item in the current database.
   @GuardedBy("this")
   protected var currentDb: String = formatDatabaseName(DEFAULT_DATABASE)
@@ -272,8 +269,8 @@ class SessionCatalog(
   // 

   // Tables
   // 

-  // There are two kinds of tables, temporary tables and metastore tables.
-  // Temporary tables are isolated across sessions and do not belong to any
+  // There are two kinds of tables, temporary views and metastore tables.
+  // Temporary views are isolated across sessions and do not belong to any
   // particular database. Metastore tables can be used across multiple
   // sessions as their metadata is persisted in the underlying catalog.
   // 

@@ -462,10 

spark git commit: Revert "[SPARK-22142][BUILD][STREAMING] Move Flume support behind a profile"

2017-09-29 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 9ed7394a6 -> 472864014


Revert "[SPARK-22142][BUILD][STREAMING] Move Flume support behind a profile"

This reverts commit a2516f41aef68e39df7f6380fd2618cc148a609e.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47286401
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47286401
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47286401

Branch: refs/heads/master
Commit: 472864014c42da08b9d3f3fffbe657c6fcf1e2ef
Parents: 9ed7394
Author: gatorsmile 
Authored: Fri Sep 29 11:45:58 2017 -0700
Committer: gatorsmile 
Committed: Fri Sep 29 11:45:58 2017 -0700

--
 dev/create-release/release-build.sh |  4 ++--
 dev/mima|  2 +-
 dev/scalastyle  |  1 -
 dev/sparktestsupport/modules.py | 20 +---
 dev/test-dependencies.sh|  2 +-
 docs/building-spark.md  |  6 --
 pom.xml | 13 +++--
 project/SparkBuild.scala| 17 -
 python/pyspark/streaming/tests.py   | 16 +++-
 9 files changed, 19 insertions(+), 62 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index 7e8d5c7..5390f59 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -84,9 +84,9 @@ MVN="build/mvn --force"
 # Hive-specific profiles for some builds
 HIVE_PROFILES="-Phive -Phive-thriftserver"
 # Profiles for publishing snapshots and release to Maven Central
-PUBLISH_PROFILES="-Pmesos -Pyarn -Pflume $HIVE_PROFILES -Pspark-ganglia-lgpl 
-Pkinesis-asl"
+PUBLISH_PROFILES="-Pmesos -Pyarn $HIVE_PROFILES -Pspark-ganglia-lgpl 
-Pkinesis-asl"
 # Profiles for building binary releases
-BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Pflume -Psparkr"
+BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Psparkr"
 # Scala 2.11 only profiles for some builds
 SCALA_2_11_PROFILES="-Pkafka-0-8"
 # Scala 2.12 only profiles for some builds

http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/mima
--
diff --git a/dev/mima b/dev/mima
index 1e3ca97..fdb21f5 100755
--- a/dev/mima
+++ b/dev/mima
@@ -24,7 +24,7 @@ set -e
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"
 
-SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pflume -Pspark-ganglia-lgpl 
-Pkinesis-asl -Phive-thriftserver -Phive"
+SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl 
-Phive-thriftserver -Phive"
 TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export 
tools/fullClasspath" | tail -n1)"
 OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES 
"export oldDeps/fullClasspath" | tail -n1)"
 

http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/scalastyle
--
diff --git a/dev/scalastyle b/dev/scalastyle
index 89ecc8a..e5aa589 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -25,7 +25,6 @@ ERRORS=$(echo -e "q\n" \
 -Pmesos \
 -Pkafka-0-8 \
 -Pyarn \
--Pflume \
 -Phive \
 -Phive-thriftserver \
 scalastyle test:scalastyle \

http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/sparktestsupport/modules.py
--
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 91d5667..50e14b6 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -279,12 +279,6 @@ streaming_flume_sink = Module(
 source_file_regexes=[
 "external/flume-sink",
 ],
-build_profile_flags=[
-"-Pflume",
-],
-environ={
-"ENABLE_FLUME_TESTS": "1"
-},
 sbt_test_goals=[
 "streaming-flume-sink/test",
 ]
@@ -297,12 +291,6 @@ streaming_flume = Module(
 source_file_regexes=[
 "external/flume",
 ],
-build_profile_flags=[
-"-Pflume",
-],
-environ={
-"ENABLE_FLUME_TESTS": "1"
-},
 sbt_test_goals=[
 "streaming-flume/test",
 ]
@@ -314,13 +302,7 @@ streaming_flume_assembly = Module(
 dependencies=[streaming_flume, streaming_flume_sink],
 source_file_regexes=[
 "external/flume-assembly",
-],
-build_profile_flags=[
-"-Pflume",
-],
-environ={
-"ENABLE_FLUME_TESTS": "1"
-}
+]
 )
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/47286401/dev/test-dependencies.sh

spark git commit: [SPARK-22146] FileNotFoundException while reading ORC files containing special characters

2017-09-29 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 ac9a0f692 -> 7bf25e086


[SPARK-22146] FileNotFoundException while reading ORC files containing special 
characters

## What changes were proposed in this pull request?

Reading ORC files containing special characters like '%' fails with a 
FileNotFoundException.
This PR aims to fix the problem.

## How was this patch tested?

Added UT.

Author: Marco Gaido 
Author: Marco Gaido 

Closes #19368 from mgaido91/SPARK-22146.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7bf25e08
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7bf25e08
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7bf25e08

Branch: refs/heads/branch-2.2
Commit: 7bf25e086729782c62b8189e7417b86fa720553d
Parents: ac9a0f6
Author: Marco Gaido 
Authored: Thu Sep 28 23:14:53 2017 -0700
Committer: gatorsmile 
Committed: Fri Sep 29 09:05:15 2017 -0700

--
 .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala   |  2 +-
 .../spark/sql/hive/MetastoreDataSourcesSuite.scala  | 12 +++-
 2 files changed, 12 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7bf25e08/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 3a34ec5..6b76cfa 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -58,7 +58,7 @@ class OrcFileFormat extends FileFormat with 
DataSourceRegister with Serializable
   options: Map[String, String],
   files: Seq[FileStatus]): Option[StructType] = {
 OrcFileOperator.readSchema(
-  files.map(_.getPath.toUri.toString),
+  files.map(_.getPath.toString),
   Some(sparkSession.sessionState.newHadoopConf())
 )
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/7bf25e08/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 07d641d..32e97eb 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -998,7 +998,6 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
 spark.sql("""drop database if exists testdb8156 CASCADE""")
   }
 
-
   test("skip hive metadata on table creation") {
 withTempDir { tempPath =>
   val schema = StructType((1 to 5).map(i => StructField(s"c_$i", 
StringType)))
@@ -1350,6 +1349,17 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
 }
   }
 
+  Seq("orc", "parquet", "csv", "json", "text").foreach { format =>
+test(s"SPARK-22146: read files containing special characters using 
$format") {
+  val nameWithSpecialChars = s"sp%chars"
+  withTempDir { dir =>
+val tmpFile = s"$dir/$nameWithSpecialChars"
+spark.createDataset(Seq("a", "b")).write.format(format).save(tmpFile)
+spark.read.format(format).load(tmpFile)
+  }
+}
+  }
+
   private def withDebugMode(f: => Unit): Unit = {
 val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE)
 try {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[1/2] spark git commit: Preparing Spark release v2.1.2-rc3

2017-09-29 Thread holden
Repository: spark
Updated Branches:
  refs/heads/branch-2.1 60f78c20c -> 78661f95e


Preparing Spark release v2.1.2-rc3


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/efdbef41
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/efdbef41
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/efdbef41

Branch: refs/heads/branch-2.1
Commit: efdbef412cb34d6018d5c2dfce2f85c5eb1587f1
Parents: 60f78c2
Author: Holden Karau 
Authored: Fri Sep 29 09:04:26 2017 -0700
Committer: Holden Karau 
Committed: Fri Sep 29 09:04:26 2017 -0700

--
 R/pkg/DESCRIPTION | 2 +-
 assembly/pom.xml  | 2 +-
 common/network-common/pom.xml | 2 +-
 common/network-shuffle/pom.xml| 2 +-
 common/network-yarn/pom.xml   | 2 +-
 common/sketch/pom.xml | 2 +-
 common/tags/pom.xml   | 2 +-
 common/unsafe/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 docs/_config.yml  | 4 ++--
 examples/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml   | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/java8-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml  | 2 +-
 external/kafka-0-10-sql/pom.xml   | 2 +-
 external/kafka-0-10/pom.xml   | 2 +-
 external/kafka-0-8-assembly/pom.xml   | 2 +-
 external/kafka-0-8/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml | 2 +-
 external/kinesis-asl/pom.xml  | 2 +-
 external/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml| 2 +-
 launcher/pom.xml  | 2 +-
 mesos/pom.xml | 2 +-
 mllib-local/pom.xml   | 2 +-
 mllib/pom.xml | 2 +-
 pom.xml   | 2 +-
 python/pyspark/version.py | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 6c380b6..899d410 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.3
+Version: 2.1.2
 Title: R Frontend for Apache Spark
 Description: Provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),

http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index e9f915a..133f8e6 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.1.3-SNAPSHOT
+2.1.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/common/network-common/pom.xml
--
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7e203e7..d2631e4 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.1.3-SNAPSHOT
+2.1.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/common/network-shuffle/pom.xml
--
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 92dd275..c12d480 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.1.3-SNAPSHOT
+2.1.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/efdbef41/common/network-yarn/pom.xml
--
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index abca418..d22db36 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 

[spark] Git Push Summary

2017-09-29 Thread holden
Repository: spark
Updated Tags:  refs/tags/v2.1.2-rc3 [created] efdbef412

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[2/2] spark git commit: Preparing development version 2.1.3-SNAPSHOT

2017-09-29 Thread holden
Preparing development version 2.1.3-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78661f95
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78661f95
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78661f95

Branch: refs/heads/branch-2.1
Commit: 78661f95e8db059d64aee377846a6c8e892e31ec
Parents: efdbef4
Author: Holden Karau 
Authored: Fri Sep 29 09:04:35 2017 -0700
Committer: Holden Karau 
Committed: Fri Sep 29 09:04:35 2017 -0700

--
 R/pkg/DESCRIPTION | 2 +-
 assembly/pom.xml  | 2 +-
 common/network-common/pom.xml | 2 +-
 common/network-shuffle/pom.xml| 2 +-
 common/network-yarn/pom.xml   | 2 +-
 common/sketch/pom.xml | 2 +-
 common/tags/pom.xml   | 2 +-
 common/unsafe/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 docs/_config.yml  | 4 ++--
 examples/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml   | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/java8-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml  | 2 +-
 external/kafka-0-10-sql/pom.xml   | 2 +-
 external/kafka-0-10/pom.xml   | 2 +-
 external/kafka-0-8-assembly/pom.xml   | 2 +-
 external/kafka-0-8/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml | 2 +-
 external/kinesis-asl/pom.xml  | 2 +-
 external/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml| 2 +-
 launcher/pom.xml  | 2 +-
 mesos/pom.xml | 2 +-
 mllib-local/pom.xml   | 2 +-
 mllib/pom.xml | 2 +-
 pom.xml   | 2 +-
 python/pyspark/version.py | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 899d410..6c380b6 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.2
+Version: 2.1.3
 Title: R Frontend for Apache Spark
 Description: Provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),

http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 133f8e6..e9f915a 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.1.2
+2.1.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/common/network-common/pom.xml
--
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index d2631e4..7e203e7 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.1.2
+2.1.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/common/network-shuffle/pom.xml
--
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c12d480..92dd275 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.1.2
+2.1.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/78661f95/common/network-yarn/pom.xml
--
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index d22db36..abca418 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.1.2
+2.1.3-SNAPSHOT
 

spark git commit: [SPARK-22161][SQL] Add Impala-modified TPC-DS queries

2017-09-29 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 8b2d8385c -> ac9a0f692


[SPARK-22161][SQL] Add Impala-modified TPC-DS queries

## What changes were proposed in this pull request?

Added IMPALA-modified TPCDS queries to TPC-DS query suites.

- Ref: https://github.com/cloudera/impala-tpcds-kit/tree/master/queries

## How was this patch tested?
N/A

Author: gatorsmile 

Closes #19386 from gatorsmile/addImpalaQueries.

(cherry picked from commit 9ed7394a68315126b2dd00e53a444cc65b5a62ea)
Signed-off-by: gatorsmile 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac9a0f69
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac9a0f69
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac9a0f69

Branch: refs/heads/branch-2.2
Commit: ac9a0f6923a72ec8f92fe88760cf50a67497b666
Parents: 8b2d838
Author: gatorsmile 
Authored: Fri Sep 29 08:59:42 2017 -0700
Committer: gatorsmile 
Committed: Fri Sep 29 09:00:15 2017 -0700

--
 .../resources/tpcds-modifiedQueries/q10.sql |  70 ++
 .../resources/tpcds-modifiedQueries/q19.sql |  38 
 .../resources/tpcds-modifiedQueries/q27.sql |  43 
 .../test/resources/tpcds-modifiedQueries/q3.sql | 228 +++
 .../resources/tpcds-modifiedQueries/q34.sql |  45 
 .../resources/tpcds-modifiedQueries/q42.sql |  28 +++
 .../resources/tpcds-modifiedQueries/q43.sql |  36 +++
 .../resources/tpcds-modifiedQueries/q46.sql |  80 +++
 .../resources/tpcds-modifiedQueries/q52.sql |  27 +++
 .../resources/tpcds-modifiedQueries/q53.sql |  37 +++
 .../resources/tpcds-modifiedQueries/q55.sql |  24 ++
 .../resources/tpcds-modifiedQueries/q59.sql |  83 +++
 .../resources/tpcds-modifiedQueries/q63.sql |  29 +++
 .../resources/tpcds-modifiedQueries/q65.sql |  58 +
 .../resources/tpcds-modifiedQueries/q68.sql |  62 +
 .../test/resources/tpcds-modifiedQueries/q7.sql |  31 +++
 .../resources/tpcds-modifiedQueries/q73.sql |  49 
 .../resources/tpcds-modifiedQueries/q79.sql |  59 +
 .../resources/tpcds-modifiedQueries/q89.sql |  43 
 .../resources/tpcds-modifiedQueries/q98.sql |  32 +++
 .../resources/tpcds-modifiedQueries/ss_max.sql  |  14 ++
 .../org/apache/spark/sql/TPCDSQuerySuite.scala  |  26 ++-
 22 files changed, 1141 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
--
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
new file mode 100755
index 000..79dd3d5
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
@@ -0,0 +1,70 @@
+-- start query 10 in stream 0 using template query10.tpl
+with 
+v1 as (
+  select 
+ ws_bill_customer_sk as customer_sk
+  from web_sales,
+   date_dim
+  where ws_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3
+  union all
+  select 
+cs_ship_customer_sk as customer_sk
+  from catalog_sales,
+   date_dim 
+  where cs_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3
+),
+v2 as (
+  select 
+ss_customer_sk as customer_sk
+  from store_sales,
+   date_dim
+  where ss_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3 
+)
+select
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3,
+  cd_dep_count,
+  count(*) cnt4,
+  cd_dep_employed_count,
+  count(*) cnt5,
+  cd_dep_college_count,
+  count(*) cnt6
+from customer c
+join customer_address ca on (c.c_current_addr_sk = ca.ca_address_sk)
+join customer_demographics on (cd_demo_sk = c.c_current_cdemo_sk) 
+left semi join v1 on (v1.customer_sk = c.c_customer_sk) 
+left semi join v2 on (v2.customer_sk = c.c_customer_sk)
+where 
+  ca_county in ('Walker County','Richland County','Gaines County','Douglas 
County','Dona Ana County')
+group by 
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  cd_purchase_estimate,
+  cd_credit_rating,
+  cd_dep_count,
+  cd_dep_employed_count,
+  cd_dep_college_count
+order by 
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  cd_purchase_estimate,
+  cd_credit_rating,
+  cd_dep_count,
+  cd_dep_employed_count,
+  cd_dep_college_count
+limit 100
+-- end query 10 in stream 0 using template query10.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql

spark git commit: [SPARK-22129][SPARK-22138] Release script improvements

2017-09-29 Thread holden
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 8c5ab4e10 -> 8b2d8385c


[SPARK-22129][SPARK-22138] Release script improvements

## What changes were proposed in this pull request?

Use the GPG_KEY param, fix lsof to non-hardcoded path, remove version swap 
since it wasn't really needed. Use EXPORT on JAVA_HOME for downstream scripts 
as well.

## How was this patch tested?

Rolled 2.1.2 RC2

Author: Holden Karau 

Closes #19359 from holdenk/SPARK-22129-fix-signing.

(cherry picked from commit ecbe416ab5001b32737966c5a2407597a1dafc32)
Signed-off-by: Holden Karau 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8b2d8385
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8b2d8385
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8b2d8385

Branch: refs/heads/branch-2.2
Commit: 8b2d8385ca8d065c07938ebde434d189416530e2
Parents: 8c5ab4e
Author: Holden Karau 
Authored: Fri Sep 29 08:04:14 2017 -0700
Committer: Holden Karau 
Committed: Fri Sep 29 08:04:26 2017 -0700

--
 dev/create-release/release-build.sh | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8b2d8385/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index f93a96b..819f325 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -74,7 +74,7 @@ GIT_REF=${GIT_REF:-master}
 # Destination directory parent on remote server
 REMOTE_PARENT_DIR=${REMOTE_PARENT_DIR:-/home/$ASF_USERNAME/public_html}
 
-GPG="gpg --no-tty --batch"
+GPG="gpg -u $GPG_KEY --no-tty --batch"
 NEXUS_ROOT=https://repository.apache.org/service/local/staging
 NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads
 BASE_DIR=$(pwd)
@@ -116,7 +116,7 @@ else
   echo "Please set JAVA_HOME correctly."
   exit 1
 else
-  JAVA_HOME="$JAVA_7_HOME"
+  export JAVA_HOME="$JAVA_7_HOME"
 fi
   fi
 fi
@@ -131,7 +131,7 @@ DEST_DIR_NAME="spark-$SPARK_PACKAGE_VERSION"
 function LFTP {
   SSH="ssh -o ConnectTimeout=300 -o StrictHostKeyChecking=no -i $ASF_RSA_KEY"
   COMMANDS=$(cat 

spark git commit: [SPARK-22129][SPARK-22138] Release script improvements

2017-09-29 Thread holden
Repository: spark
Updated Branches:
  refs/heads/branch-2.1 361aa0efc -> 60f78c20c


[SPARK-22129][SPARK-22138] Release script improvements

## What changes were proposed in this pull request?

Use the GPG_KEY param, fix lsof to non-hardcoded path, remove version swap 
since it wasn't really needed. Use EXPORT on JAVA_HOME for downstream scripts 
as well.

## How was this patch tested?

Rolled 2.1.2 RC2

Author: Holden Karau 

Closes #19359 from holdenk/SPARK-22129-fix-signing.

(cherry picked from commit ecbe416ab5001b32737966c5a2407597a1dafc32)
Signed-off-by: Holden Karau 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/60f78c20
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/60f78c20
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/60f78c20

Branch: refs/heads/branch-2.1
Commit: 60f78c20c6b0300c08c5f5329a559b3d3225fa68
Parents: 361aa0e
Author: Holden Karau 
Authored: Fri Sep 29 08:04:14 2017 -0700
Committer: Holden Karau 
Committed: Fri Sep 29 08:04:38 2017 -0700

--
 dev/create-release/release-build.sh | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/60f78c20/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index fa889d9..ad32c31 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -74,7 +74,7 @@ GIT_REF=${GIT_REF:-master}
 # Destination directory parent on remote server
 REMOTE_PARENT_DIR=${REMOTE_PARENT_DIR:-/home/$ASF_USERNAME/public_html}
 
-GPG="gpg --no-tty --batch"
+GPG="gpg -u $GPG_KEY --no-tty --batch"
 NEXUS_ROOT=https://repository.apache.org/service/local/staging
 NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads
 BASE_DIR=$(pwd)
@@ -116,7 +116,7 @@ else
   echo "Please set JAVA_HOME correctly."
   exit 1
 else
-  JAVA_HOME="$JAVA_7_HOME"
+  export JAVA_HOME="$JAVA_7_HOME"
 fi
   fi
 fi
@@ -131,7 +131,7 @@ DEST_DIR_NAME="spark-$SPARK_PACKAGE_VERSION"
 function LFTP {
   SSH="ssh -o ConnectTimeout=300 -o StrictHostKeyChecking=no -i $ASF_RSA_KEY"
   COMMANDS=$(cat 

spark git commit: [SPARK-22129][SPARK-22138] Release script improvements

2017-09-29 Thread holden
Repository: spark
Updated Branches:
  refs/heads/master a2516f41a -> ecbe416ab


[SPARK-22129][SPARK-22138] Release script improvements

## What changes were proposed in this pull request?

Use the GPG_KEY param, fix lsof to non-hardcoded path, remove version swap 
since it wasn't really needed. Use EXPORT on JAVA_HOME for downstream scripts 
as well.

## How was this patch tested?

Rolled 2.1.2 RC2

Author: Holden Karau 

Closes #19359 from holdenk/SPARK-22129-fix-signing.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ecbe416a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ecbe416a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ecbe416a

Branch: refs/heads/master
Commit: ecbe416ab5001b32737966c5a2407597a1dafc32
Parents: a2516f4
Author: Holden Karau 
Authored: Fri Sep 29 08:04:14 2017 -0700
Committer: Holden Karau 
Committed: Fri Sep 29 08:04:14 2017 -0700

--
 dev/create-release/release-build.sh | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ecbe416a/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index c548a0a..7e8d5c7 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -74,7 +74,7 @@ GIT_REF=${GIT_REF:-master}
 # Destination directory parent on remote server
 REMOTE_PARENT_DIR=${REMOTE_PARENT_DIR:-/home/$ASF_USERNAME/public_html}
 
-GPG="gpg --no-tty --batch"
+GPG="gpg -u $GPG_KEY --no-tty --batch"
 NEXUS_ROOT=https://repository.apache.org/service/local/staging
 NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads
 BASE_DIR=$(pwd)
@@ -125,7 +125,7 @@ else
   echo "Please set JAVA_HOME correctly."
   exit 1
 else
-  JAVA_HOME="$JAVA_7_HOME"
+  export JAVA_HOME="$JAVA_7_HOME"
 fi
   fi
 fi
@@ -140,7 +140,7 @@ DEST_DIR_NAME="spark-$SPARK_PACKAGE_VERSION"
 function LFTP {
   SSH="ssh -o ConnectTimeout=300 -o StrictHostKeyChecking=no -i $ASF_RSA_KEY"
   COMMANDS=$(cat 

spark git commit: [SPARK-22142][BUILD][STREAMING] Move Flume support behind a profile

2017-09-29 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 0fa4dbe4f -> a2516f41a


[SPARK-22142][BUILD][STREAMING] Move Flume support behind a profile

## What changes were proposed in this pull request?

Add 'flume' profile to enable Flume-related integration modules

## How was this patch tested?

Existing tests; no functional change

Author: Sean Owen 

Closes #19365 from srowen/SPARK-22142.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2516f41
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2516f41
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2516f41

Branch: refs/heads/master
Commit: a2516f41aef68e39df7f6380fd2618cc148a609e
Parents: 0fa4dbe
Author: Sean Owen 
Authored: Fri Sep 29 08:26:53 2017 +0100
Committer: Sean Owen 
Committed: Fri Sep 29 08:26:53 2017 +0100

--
 dev/create-release/release-build.sh |  4 ++--
 dev/mima|  2 +-
 dev/scalastyle  |  1 +
 dev/sparktestsupport/modules.py | 20 +++-
 dev/test-dependencies.sh|  2 +-
 docs/building-spark.md  |  6 ++
 pom.xml | 13 ++---
 project/SparkBuild.scala| 17 +
 python/pyspark/streaming/tests.py   | 16 +---
 9 files changed, 62 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a2516f41/dev/create-release/release-build.sh
--
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index 8de1d6a..c548a0a 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -84,9 +84,9 @@ MVN="build/mvn --force"
 # Hive-specific profiles for some builds
 HIVE_PROFILES="-Phive -Phive-thriftserver"
 # Profiles for publishing snapshots and release to Maven Central
-PUBLISH_PROFILES="-Pmesos -Pyarn $HIVE_PROFILES -Pspark-ganglia-lgpl 
-Pkinesis-asl"
+PUBLISH_PROFILES="-Pmesos -Pyarn -Pflume $HIVE_PROFILES -Pspark-ganglia-lgpl 
-Pkinesis-asl"
 # Profiles for building binary releases
-BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Psparkr"
+BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Pflume -Psparkr"
 # Scala 2.11 only profiles for some builds
 SCALA_2_11_PROFILES="-Pkafka-0-8"
 # Scala 2.12 only profiles for some builds

http://git-wip-us.apache.org/repos/asf/spark/blob/a2516f41/dev/mima
--
diff --git a/dev/mima b/dev/mima
index fdb21f5..1e3ca97 100755
--- a/dev/mima
+++ b/dev/mima
@@ -24,7 +24,7 @@ set -e
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"
 
-SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl 
-Phive-thriftserver -Phive"
+SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pflume -Pspark-ganglia-lgpl 
-Pkinesis-asl -Phive-thriftserver -Phive"
 TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export 
tools/fullClasspath" | tail -n1)"
 OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES 
"export oldDeps/fullClasspath" | tail -n1)"
 

http://git-wip-us.apache.org/repos/asf/spark/blob/a2516f41/dev/scalastyle
--
diff --git a/dev/scalastyle b/dev/scalastyle
index e5aa589..89ecc8a 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -25,6 +25,7 @@ ERRORS=$(echo -e "q\n" \
 -Pmesos \
 -Pkafka-0-8 \
 -Pyarn \
+-Pflume \
 -Phive \
 -Phive-thriftserver \
 scalastyle test:scalastyle \

http://git-wip-us.apache.org/repos/asf/spark/blob/a2516f41/dev/sparktestsupport/modules.py
--
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 50e14b6..91d5667 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -279,6 +279,12 @@ streaming_flume_sink = Module(
 source_file_regexes=[
 "external/flume-sink",
 ],
+build_profile_flags=[
+"-Pflume",
+],
+environ={
+"ENABLE_FLUME_TESTS": "1"
+},
 sbt_test_goals=[
 "streaming-flume-sink/test",
 ]
@@ -291,6 +297,12 @@ streaming_flume = Module(
 source_file_regexes=[
 "external/flume",
 ],
+build_profile_flags=[
+"-Pflume",
+],
+environ={
+"ENABLE_FLUME_TESTS": "1"
+},
 sbt_test_goals=[
 "streaming-flume/test",
 ]
@@ -302,7 +314,13 @@ streaming_flume_assembly = Module(
 dependencies=[streaming_flume, streaming_flume_sink],
 source_file_regexes=[
 "external/flume-assembly",
-]
+],
+build_profile_flags=[
+

spark git commit: [SPARK-22141][FOLLOWUP][SQL] Add comments for the order of batches

2017-09-29 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 161ba7eaa -> 0fa4dbe4f


[SPARK-22141][FOLLOWUP][SQL] Add comments for the order of batches

## What changes were proposed in this pull request?
Add comments for specifying the position of  batch "Check Cartesian Products", 
as rxin suggested in https://github.com/apache/spark/pull/19362 .

## How was this patch tested?
Unit test

Author: Wang Gengliang 

Closes #19379 from gengliangwang/SPARK-22141-followup.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0fa4dbe4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0fa4dbe4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0fa4dbe4

Branch: refs/heads/master
Commit: 0fa4dbe4f4d7b988be2105b46590b5207f7c8121
Parents: 161ba7e
Author: Wang Gengliang 
Authored: Thu Sep 28 23:23:30 2017 -0700
Committer: gatorsmile 
Committed: Thu Sep 28 23:23:30 2017 -0700

--
 .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala  | 4 
 1 file changed, 4 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0fa4dbe4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index a391c51..b9fa39d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -134,6 +134,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
 Batch("LocalRelation", fixedPoint,
   ConvertToLocalRelation,
   PropagateEmptyRelation) ::
+// The following batch should be executed after batch "Join Reorder" and 
"LocalRelation".
 Batch("Check Cartesian Products", Once,
   CheckCartesianProducts) ::
 Batch("OptimizeCodegen", Once,
@@ -1089,6 +1090,9 @@ object CombineLimits extends Rule[LogicalPlan] {
  * SELECT * from R, S where R.r = S.s,
  * the join between R and S is not a cartesian product and therefore should be 
allowed.
  * The predicate R.r = S.s is not recognized as a join condition until the 
ReorderJoin rule.
+ *
+ * This rule must be run AFTER the batch "LocalRelation", since a join with 
empty relation should
+ * not be a cartesian product.
  */
 object CheckCartesianProducts extends Rule[LogicalPlan] with PredicateHelper {
   /**


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-22146] FileNotFoundException while reading ORC files containing special characters

2017-09-29 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 323806e68 -> 161ba7eaa


[SPARK-22146] FileNotFoundException while reading ORC files containing special 
characters

## What changes were proposed in this pull request?

Reading ORC files containing special characters like '%' fails with a 
FileNotFoundException.
This PR aims to fix the problem.

## How was this patch tested?

Added UT.

Author: Marco Gaido 
Author: Marco Gaido 

Closes #19368 from mgaido91/SPARK-22146.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/161ba7ea
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/161ba7ea
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/161ba7ea

Branch: refs/heads/master
Commit: 161ba7eaa4539f0a7f20d9e2a493e0e323ca5249
Parents: 323806e
Author: Marco Gaido 
Authored: Thu Sep 28 23:14:53 2017 -0700
Committer: gatorsmile 
Committed: Thu Sep 28 23:14:53 2017 -0700

--
 .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala   |  2 +-
 .../spark/sql/hive/MetastoreDataSourcesSuite.scala  | 12 +++-
 2 files changed, 12 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/161ba7ea/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 4d92a67..c76f0eb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -58,7 +58,7 @@ class OrcFileFormat extends FileFormat with 
DataSourceRegister with Serializable
   options: Map[String, String],
   files: Seq[FileStatus]): Option[StructType] = {
 OrcFileOperator.readSchema(
-  files.map(_.getPath.toUri.toString),
+  files.map(_.getPath.toString),
   Some(sparkSession.sessionState.newHadoopConf())
 )
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/161ba7ea/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 29b0e6c..f5d41c9 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -993,7 +993,6 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
 spark.sql("""drop database if exists testdb8156 CASCADE""")
   }
 
-
   test("skip hive metadata on table creation") {
 withTempDir { tempPath =>
   val schema = StructType((1 to 5).map(i => StructField(s"c_$i", 
StringType)))
@@ -1345,6 +1344,17 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
 }
   }
 
+  Seq("orc", "parquet", "csv", "json", "text").foreach { format =>
+test(s"SPARK-22146: read files containing special characters using 
$format") {
+  val nameWithSpecialChars = s"sp%chars"
+  withTempDir { dir =>
+val tmpFile = s"$dir/$nameWithSpecialChars"
+spark.createDataset(Seq("a", "b")).write.format(format).save(tmpFile)
+spark.read.format(format).load(tmpFile)
+  }
+}
+  }
+
   private def withDebugMode(f: => Unit): Unit = {
 val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE)
 try {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org