date:20180716

spark git commit: Revert "[SPARK-24402][SQL] Optimize `In` expression when only one element in the collection or collection is empty"

2018-07-16 Thread gurwls223

Repository: spark
Updated Branches:
  refs/heads/master f876d3fa8 -> 0ca16f6e1


Revert "[SPARK-24402][SQL] Optimize `In` expression when only one element in 
the collection or collection is empty"

This reverts commit 0f0d1865f581a9158d73505471953656b173beba.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ca16f6e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ca16f6e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ca16f6e

Branch: refs/heads/master
Commit: 0ca16f6e143768f0c96b5310c1f81b3b51dcbbc8
Parents: f876d3f
Author: hyukjinkwon 
Authored: Tue Jul 17 11:30:53 2018 +0800
Committer: hyukjinkwon 
Committed: Tue Jul 17 11:30:53 2018 +0800

--
 .../sql/catalyst/optimizer/expressions.scala| 13 +++-
 .../catalyst/optimizer/OptimizeInSuite.scala| 32 
 2 files changed, 4 insertions(+), 41 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0ca16f6e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index f78a0ff..1d363b8 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -218,20 +218,15 @@ object ReorderAssociativeOperator extends 
Rule[LogicalPlan] {
 object OptimizeIn extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
 case q: LogicalPlan => q transformExpressionsDown {
-  case In(v, list) if list.isEmpty =>
-// When v is not nullable, the following expression will be optimized
-// to FalseLiteral which is tested in OptimizeInSuite.scala
-If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType))
+  case In(v, list) if list.isEmpty && !v.nullable => FalseLiteral
   case expr @ In(v, list) if expr.inSetConvertible =>
 val newList = ExpressionSet(list).toSeq
-if (newList.length == 1 && !newList.isInstanceOf[ListQuery]) {
-  EqualTo(v, newList.head)
-} else if (newList.length > 
SQLConf.get.optimizerInSetConversionThreshold) {
+if (newList.size > SQLConf.get.optimizerInSetConversionThreshold) {
   val hSet = newList.map(e => e.eval(EmptyRow))
   InSet(v, HashSet() ++ hSet)
-} else if (newList.length < list.length) {
+} else if (newList.size < list.size) {
   expr.copy(list = newList)
-} else { // newList.length == list.length && newList.length > 1
+} else { // newList.length == list.length
   expr
 }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/0ca16f6e/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index 86522a6..478118e 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -176,21 +176,6 @@ class OptimizeInSuite extends PlanTest {
 }
   }
 
-  test("OptimizedIn test: one element in list gets transformed to EqualTo.") {
-val originalQuery =
-  testRelation
-.where(In(UnresolvedAttribute("a"), Seq(Literal(1
-.analyze
-
-val optimized = Optimize.execute(originalQuery)
-val correctAnswer =
-  testRelation
-.where(EqualTo(UnresolvedAttribute("a"), Literal(1)))
-.analyze
-
-comparePlans(optimized, correctAnswer)
-  }
-
   test("OptimizedIn test: In empty list gets transformed to FalseLiteral " +
 "when value is not nullable") {
 val originalQuery =
@@ -206,21 +191,4 @@ class OptimizeInSuite extends PlanTest {
 
 comparePlans(optimized, correctAnswer)
   }
-
-  test("OptimizedIn test: In empty list gets transformed to `If` expression " +
-"when value is nullable") {
-val originalQuery =
-  testRelation
-.where(In(UnresolvedAttribute("a"), Nil))
-.analyze
-
-val optimized = Optimize.execute(originalQuery)
-val correctAnswer =
-  testRelation
-.where(If(IsNotNull(UnresolvedAttribute("a")),
-  Literal(false), Literal.create(null, BooleanType)))
-.analyze
-
-comparePlans(optimized, correctAnswer)
-  }
 }

svn commit: r28164 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_20_01-f876d3f-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-07-16 Thread pwendell

Author: pwendell
Date: Tue Jul 17 03:16:07 2018
New Revision: 28164

Log:
Apache Spark 2.4.0-SNAPSHOT-2018_07_16_20_01-f876d3f docs


[This commit notification would consist of 1468 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20220][DOCS] Documentation Add thrift scheduling pool config to scheduling docs

2018-07-16 Thread gurwls223

Repository: spark
Updated Branches:
  refs/heads/master d57a267b7 -> f876d3fa8


[SPARK-20220][DOCS] Documentation Add thrift scheduling pool config to 
scheduling docs

## What changes were proposed in this pull request?

The thrift scheduling pool configuration was removed from a previous release. 
Adding this back to the job scheduling configuration docs.

This PR takes over #17536 and handle some comments here.

## How was this patch tested?

Manually.

Closes #17536

Author: hyukjinkwon 

Closes #21778 from HyukjinKwon/SPARK-20220.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f876d3fa
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f876d3fa
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f876d3fa

Branch: refs/heads/master
Commit: f876d3fa800ae04ec33f27295354669bb1db911e
Parents: d57a267
Author: Miklos C 
Authored: Tue Jul 17 09:22:16 2018 +0800
Committer: hyukjinkwon 
Committed: Tue Jul 17 09:22:16 2018 +0800

--
 docs/job-scheduling.md | 8 
 1 file changed, 8 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f876d3fa/docs/job-scheduling.md
--
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index da90342..2316f17 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -264,3 +264,11 @@ within it for the various settings. For example:
 A full example is also available in `conf/fairscheduler.xml.template`. Note 
that any pools not
 configured in the XML file will simply get default values for all settings 
(scheduling mode FIFO,
 weight 1, and minShare 0).
+
+## Scheduling using JDBC Connections
+To set a [Fair Scheduler](job-scheduling.html#fair-scheduler-pools) pool for a 
JDBC client session,
+users can set the `spark.sql.thriftserver.scheduler.pool` variable:
+
+{% highlight SQL %}
+SET spark.sql.thriftserver.scheduler.pool=accounting;
+{% endhighlight %}


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-23259][SQL] Clean up legacy code around hive external catalog and HiveClientImpl

2018-07-16 Thread gurwls223

Repository: spark
Updated Branches:
  refs/heads/master 0f0d1865f -> d57a267b7


[SPARK-23259][SQL] Clean up legacy code around hive external catalog and 
HiveClientImpl

## What changes were proposed in this pull request?

Three legacy statements are removed by this patch:

- in HiveExternalCatalog: The withClient wrapper is not necessary for the 
private method getRawTable.

- in HiveClientImpl: There are some redundant code in both the tableExists and 
getTableOption method.

This PR takes over https://github.com/apache/spark/pull/20425

## How was this patch tested?

Existing tests

Closes #20425

Author: hyukjinkwon 

Closes #21780 from HyukjinKwon/SPARK-23259.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d57a267b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d57a267b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d57a267b

Branch: refs/heads/master
Commit: d57a267b79f4015508c3686c34a0f438bad41ea1
Parents: 0f0d186
Author: Feng Liu 
Authored: Tue Jul 17 09:13:35 2018 +0800
Committer: hyukjinkwon 
Committed: Tue Jul 17 09:13:35 2018 +0800

--
 .../org/apache/spark/sql/hive/HiveExternalCatalog.scala  | 2 +-
 .../org/apache/spark/sql/hive/client/HiveClientImpl.scala| 8 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d57a267b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 011a3ba..44480ce 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -114,7 +114,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
* should interpret these special data source properties and restore the 
original table metadata
* before returning it.
*/
-  private[hive] def getRawTable(db: String, table: String): CatalogTable = 
withClient {
+  private[hive] def getRawTable(db: String, table: String): CatalogTable = {
 client.getTable(db, table)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d57a267b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 1df46d7..db8fd5a 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -353,15 +353,19 @@ private[hive] class HiveClientImpl(
 client.getDatabasesByPattern(pattern).asScala
   }
 
+  private def getRawTableOption(dbName: String, tableName: String): 
Option[HiveTable] = {
+Option(client.getTable(dbName, tableName, false /* do not throw exception 
*/))
+  }
+
   override def tableExists(dbName: String, tableName: String): Boolean = 
withHiveState {
-Option(client.getTable(dbName, tableName, false /* do not throw exception 
*/)).nonEmpty
+getRawTableOption(dbName, tableName).nonEmpty
   }
 
   override def getTableOption(
   dbName: String,
   tableName: String): Option[CatalogTable] = withHiveState {
 logDebug(s"Looking up $dbName.$tableName")
-Option(client.getTable(dbName, tableName, false)).map { h =>
+getRawTableOption(dbName, tableName).map { h =>
   // Note: Hive separates partition columns and the schema, but for us the
   // partition columns are part of the schema
   val cols = h.getCols.asScala.map(fromHiveColumn)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r28159 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_16_01-0f0d186-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-07-16 Thread pwendell

Author: pwendell
Date: Mon Jul 16 23:15:42 2018
New Revision: 28159

Log:
Apache Spark 2.4.0-SNAPSHOT-2018_07_16_16_01-0f0d186 docs


[This commit notification would consist of 1467 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-24402][SQL] Optimize `In` expression when only one element in the collection or collection is empty

2018-07-16 Thread lixiao

Repository: spark
Updated Branches:
  refs/heads/master ba437fc5c -> 0f0d1865f


[SPARK-24402][SQL] Optimize `In` expression when only one element in the 
collection or collection is empty

## What changes were proposed in this pull request?

Two new rules in the logical plan optimizers are added.

1. When there is only one element in the **`Collection`**, the
physical plan will be optimized to **`EqualTo`**, so predicate
pushdown can be used.

```scala
profileDF.filter( $"profileID".isInCollection(Set(6))).explain(true)
"""
  |== Physical Plan ==
  |*(1) Project [profileID#0]
  |+- *(1) Filter (isnotnull(profileID#0) && (profileID#0 = 6))
  |   +- *(1) FileScan parquet [profileID#0] Batched: true, Format: Parquet,
  | PartitionFilters: [],
  | PushedFilters: [IsNotNull(profileID), EqualTo(profileID,6)],
  | ReadSchema: struct
""".stripMargin
```

2. When the **`Collection`** is empty, and the input is nullable, the
logical plan will be simplified to

```scala
profileDF.filter( $"profileID".isInCollection(Set())).explain(true)
"""
  |== Optimized Logical Plan ==
  |Filter if (isnull(profileID#0)) null else false
  |+- Relation[profileID#0] parquet
""".stripMargin
```

TODO:

1. For multiple conditions with numbers less than certain thresholds,
we should still allow predicate pushdown.
2. Optimize the **`In`** using **`tableswitch`** or **`lookupswitch`**
when the numbers of the categories are low, and they are **`Int`**,
**`Long`**.
3. The default immutable hash trees set is slow for query, and we
should do benchmark for using different set implementation for faster
query.
4. **`filter(if (condition) null else false)`** can be optimized to false.

## How was this patch tested?

Couple new tests are added.

Author: DB Tsai 

Closes #21442 from dbtsai/optimize-in.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f0d1865
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f0d1865
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f0d1865

Branch: refs/heads/master
Commit: 0f0d1865f581a9158d73505471953656b173beba
Parents: ba437fc
Author: DB Tsai 
Authored: Mon Jul 16 15:33:39 2018 -0700
Committer: Xiao Li 
Committed: Mon Jul 16 15:33:39 2018 -0700

--
 .../sql/catalyst/optimizer/expressions.scala| 13 +---
 .../catalyst/optimizer/OptimizeInSuite.scala| 32 
 2 files changed, 41 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0f0d1865/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 1d363b8..f78a0ff 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -218,15 +218,20 @@ object ReorderAssociativeOperator extends 
Rule[LogicalPlan] {
 object OptimizeIn extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
 case q: LogicalPlan => q transformExpressionsDown {
-  case In(v, list) if list.isEmpty && !v.nullable => FalseLiteral
+  case In(v, list) if list.isEmpty =>
+// When v is not nullable, the following expression will be optimized
+// to FalseLiteral which is tested in OptimizeInSuite.scala
+If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType))
   case expr @ In(v, list) if expr.inSetConvertible =>
 val newList = ExpressionSet(list).toSeq
-if (newList.size > SQLConf.get.optimizerInSetConversionThreshold) {
+if (newList.length == 1 && !newList.isInstanceOf[ListQuery]) {
+  EqualTo(v, newList.head)
+} else if (newList.length > 
SQLConf.get.optimizerInSetConversionThreshold) {
   val hSet = newList.map(e => e.eval(EmptyRow))
   InSet(v, HashSet() ++ hSet)
-} else if (newList.size < list.size) {
+} else if (newList.length < list.length) {
   expr.copy(list = newList)
-} else { // newList.length == list.length
+} else { // newList.length == list.length && newList.length > 1
   expr
 }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/0f0d1865/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
 
b/sql/catalyst/src/t

spark git commit: [SPARK-24805][SQL] Do not ignore avro files without extensions by default

2018-07-16 Thread lixiao

Repository: spark
Updated Branches:
  refs/heads/master b0c95a1d6 -> ba437fc5c


[SPARK-24805][SQL] Do not ignore avro files without extensions by default

## What changes were proposed in this pull request?

In the PR, I propose to change default behaviour of AVRO datasource which 
currently ignores files without `.avro` extension in read by default. This PR 
sets the default value for `avro.mapred.ignore.inputs.without.extension` to 
`false` in the case if the parameter is not set by an user.

## How was this patch tested?

Added a test file without extension in AVRO format, and new test for reading 
the file with and wihout specified schema.

Author: Maxim Gekk 
Author: Maxim Gekk 

Closes #21769 from MaxGekk/avro-without-extension.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba437fc5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba437fc5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba437fc5

Branch: refs/heads/master
Commit: ba437fc5c73b95ee4c59327abf3161c58f64cb12
Parents: b0c95a1
Author: Maxim Gekk 
Authored: Mon Jul 16 14:35:44 2018 -0700
Committer: Xiao Li 
Committed: Mon Jul 16 14:35:44 2018 -0700

--
 .../apache/spark/sql/avro/AvroFileFormat.scala  | 14 +++---
 .../org/apache/spark/sql/avro/AvroSuite.scala   | 45 +---
 2 files changed, 47 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ba437fc5/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
--
diff --git 
a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala 
b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
index fb93033..9eb2064 100755
--- 
a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
+++ 
b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
@@ -62,7 +62,7 @@ private[avro] class AvroFileFormat extends FileFormat with 
DataSourceRegister {
 // Schema evolution is not supported yet. Here we only pick a single 
random sample file to
 // figure out the schema of the whole dataset.
 val sampleFile =
-  if (conf.getBoolean(AvroFileFormat.IgnoreFilesWithoutExtensionProperty, 
true)) {
+  if (AvroFileFormat.ignoreFilesWithoutExtensions(conf)) {
 files.find(_.getPath.getName.endsWith(".avro")).getOrElse {
   throw new FileNotFoundException(
 "No Avro files found. Hadoop option 
\"avro.mapred.ignore.inputs.without.extension\" " +
@@ -170,10 +170,7 @@ private[avro] class AvroFileFormat extends FileFormat with 
DataSourceRegister {
   // Doing input file filtering is improper because we may generate empty 
tasks that process no
   // input files but stress the scheduler. We should probably add a more 
general input file
   // filtering mechanism for `FileFormat` data sources. See SPARK-16317.
-  if (
-conf.getBoolean(AvroFileFormat.IgnoreFilesWithoutExtensionProperty, 
true) &&
-!file.filePath.endsWith(".avro")
-  ) {
+  if (AvroFileFormat.ignoreFilesWithoutExtensions(conf) && 
!file.filePath.endsWith(".avro")) {
 Iterator.empty
   } else {
 val reader = {
@@ -278,4 +275,11 @@ private[avro] object AvroFileFormat {
   value.readFields(new DataInputStream(in))
 }
   }
+
+  def ignoreFilesWithoutExtensions(conf: Configuration): Boolean = {
+// Files without .avro extensions are not ignored by default
+val defaultValue = false
+
+conf.getBoolean(AvroFileFormat.IgnoreFilesWithoutExtensionProperty, 
defaultValue)
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/ba437fc5/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
--
diff --git 
a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala 
b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 9c6526b..446b421 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.avro
 
 import java.io._
-import java.nio.file.Files
+import java.net.URL
+import java.nio.file.{Files, Path, Paths}
 import java.sql.{Date, Timestamp}
 import java.util.{TimeZone, UUID}
 
@@ -622,7 +623,12 @@ class AvroSuite extends QueryTest with SharedSQLContext 
with SQLTestUtils {
 intercept[FileNotFoundException] {
   withTempPath { dir =>
 FileUtils.touch(new File(dir, "test"))
-spark.read.avro(dir.toString)
+val hadoopConf = spark.sqlContext.sparkContext.hadoopConfiguration
+try {
+  hadoopConf.set

spark git commit: [SPARK-23901][SQL] Removing masking functions

2018-07-16 Thread lixiao

Repository: spark
Updated Branches:
  refs/heads/master b045315e5 -> b0c95a1d6


[SPARK-23901][SQL] Removing masking functions

The PR reverts #21246.

Author: Marek Novotny 

Closes #21786 from mn-mikke/SPARK-23901.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b0c95a1d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b0c95a1d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b0c95a1d

Branch: refs/heads/master
Commit: b0c95a1d69df752bd62e49838a98268f6847
Parents: b045315
Author: Marek Novotny 
Authored: Mon Jul 16 14:28:35 2018 -0700
Committer: Xiao Li 
Committed: Mon Jul 16 14:28:35 2018 -0700

--
 .../expressions/MaskExpressionsUtils.java   |  80 ---
 .../catalyst/analysis/FunctionRegistry.scala|   8 -
 .../catalyst/expressions/maskExpressions.scala  | 569 ---
 .../expressions/MaskExpressionsSuite.scala  | 236 
 .../scala/org/apache/spark/sql/functions.scala  | 119 
 .../spark/sql/DataFrameFunctionsSuite.scala | 107 
 6 files changed, 1119 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b0c95a1d/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java
--
diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java
deleted file mode 100644
index 0587990..000
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions;
-
-/**
- * Contains all the Utils methods used in the masking expressions.
- */
-public class MaskExpressionsUtils {
-  static final int UNMASKED_VAL = -1;
-
-  /**
-   * Returns the masking character for {@param c} or {@param c} is it should 
not be masked.
-   * @param c the character to transform
-   * @param maskedUpperChar the character to use instead of a uppercase letter
-   * @param maskedLowerChar the character to use instead of a lowercase letter
-   * @param maskedDigitChar the character to use instead of a digit
-   * @param maskedOtherChar the character to use instead of a any other 
character
-   * @return masking character for {@param c}
-   */
-  public static int transformChar(
-  final int c,
-  int maskedUpperChar,
-  int maskedLowerChar,
-  int maskedDigitChar,
-  int maskedOtherChar) {
-switch(Character.getType(c)) {
-  case Character.UPPERCASE_LETTER:
-if(maskedUpperChar != UNMASKED_VAL) {
-  return maskedUpperChar;
-}
-break;
-
-  case Character.LOWERCASE_LETTER:
-if(maskedLowerChar != UNMASKED_VAL) {
-  return maskedLowerChar;
-}
-break;
-
-  case Character.DECIMAL_DIGIT_NUMBER:
-if(maskedDigitChar != UNMASKED_VAL) {
-  return maskedDigitChar;
-}
-break;
-
-  default:
-if(maskedOtherChar != UNMASKED_VAL) {
-  return maskedOtherChar;
-}
-break;
-}
-
-return c;
-  }
-
-  /**
-   * Returns the replacement char to use according to the {@param rep} 
specified by the user and
-   * the {@param def} default.
-   */
-  public static int getReplacementChar(String rep, int def) {
-if (rep != null && rep.length() > 0) {
-  return rep.codePointAt(0);
-}
-return def;
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0c95a1d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 1d9e470..d696ce9 100644
--- 
a/s

svn commit: r28155 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_12_01-b045315-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-07-16 Thread pwendell

Author: pwendell
Date: Mon Jul 16 19:16:21 2018
New Revision: 28155

Log:
Apache Spark 2.4.0-SNAPSHOT-2018_07_16_12_01-b045315 docs


[This commit notification would consist of 1467 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r28148 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_08_02-cf97045-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-07-16 Thread pwendell

Author: pwendell
Date: Mon Jul 16 15:16:19 2018
New Revision: 28148

Log:
Apache Spark 2.4.0-SNAPSHOT-2018_07_16_08_02-cf97045 docs


[This commit notification would consist of 1467 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-24734][SQL] Fix type coercions and nullabilities of nested data types of some functions.

2018-07-16 Thread wenchen

Repository: spark
Updated Branches:
  refs/heads/master cf9704534 -> b045315e5


[SPARK-24734][SQL] Fix type coercions and nullabilities of nested data types of 
some functions.

## What changes were proposed in this pull request?

We have some functions which need to aware the nullabilities of all children, 
such as `CreateArray`, `CreateMap`, `Concat`, and so on. Currently we add casts 
to fix the nullabilities, but the casts might be removed during the 
optimization phase.
After the discussion, we decided to not add extra casts for just fixing the 
nullabilities of the nested types, but handle them by functions themselves.

## How was this patch tested?

Modified and added some tests.

Author: Takuya UESHIN 

Closes #21704 from ueshin/issues/SPARK-24734/concat_containsnull.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b045315e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b045315e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b045315e

Branch: refs/heads/master
Commit: b045315e5d87b7ea3588436053aaa4d5a7bd103f
Parents: cf97045
Author: Takuya UESHIN 
Authored: Mon Jul 16 23:16:25 2018 +0800
Committer: Wenchen Fan 
Committed: Mon Jul 16 23:16:25 2018 +0800

--
 .../sql/catalyst/analysis/TypeCoercion.scala| 113 ++-
 .../sql/catalyst/expressions/Expression.scala   |  12 +-
 .../sql/catalyst/expressions/arithmetic.scala   |  14 +--
 .../expressions/collectionOperations.scala  |  22 ++--
 .../expressions/complexTypeCreator.scala|  15 ++-
 .../expressions/conditionalExpressions.scala|   4 +-
 .../sql/catalyst/expressions/literals.scala |   2 +-
 .../catalyst/expressions/nullExpressions.scala  |   6 +-
 .../spark/sql/catalyst/util/TypeUtils.scala |  16 +--
 .../catalyst/analysis/TypeCoercionSuite.scala   |  43 ---
 .../expressions/ArithmeticExpressionSuite.scala |  12 ++
 .../CollectionExpressionsSuite.scala|  60 +++---
 .../catalyst/expressions/ComplexTypeSuite.scala |  19 
 .../expressions/NullExpressionsSuite.scala  |   7 ++
 .../spark/sql/DataFrameFunctionsSuite.scala |   8 ++
 15 files changed, 211 insertions(+), 142 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b045315e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index e8331c9..316aebd 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -184,6 +184,17 @@ object TypeCoercion {
 }
   }
 
+  def findCommonTypeDifferentOnlyInNullFlags(types: Seq[DataType]): 
Option[DataType] = {
+if (types.isEmpty) {
+  None
+} else {
+  types.tail.foldLeft[Option[DataType]](Some(types.head)) {
+case (Some(t1), t2) => findCommonTypeDifferentOnlyInNullFlags(t1, t2)
+case _ => None
+  }
+}
+  }
+
   /**
* Case 2 type widening (see the classdoc comment above for TypeCoercion).
*
@@ -259,8 +270,25 @@ object TypeCoercion {
 }
   }
 
-  private def haveSameType(exprs: Seq[Expression]): Boolean =
-exprs.map(_.dataType).distinct.length == 1
+  /**
+   * Check whether the given types are equal ignoring nullable, containsNull 
and valueContainsNull.
+   */
+  def haveSameType(types: Seq[DataType]): Boolean = {
+if (types.size <= 1) {
+  true
+} else {
+  val head = types.head
+  types.tail.forall(_.sameType(head))
+}
+  }
+
+  private def castIfNotSameType(expr: Expression, dt: DataType): Expression = {
+if (!expr.dataType.sameType(dt)) {
+  Cast(expr, dt)
+} else {
+  expr
+}
+  }
 
   /**
* Widens numeric types and converts strings to numbers when appropriate.
@@ -525,23 +553,24 @@ object TypeCoercion {
* This ensure that the types for various functions are as expected.
*/
   object FunctionArgumentConversion extends TypeCoercionRule {
+
 override protected def coerceTypes(
 plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
   // Skip nodes who's children have not been resolved yet.
   case e if !e.childrenResolved => e
 
-  case a @ CreateArray(children) if !haveSameType(children) =>
+  case a @ CreateArray(children) if 
!haveSameType(children.map(_.dataType)) =>
 val types = children.map(_.dataType)
 findWiderCommonType(types) match {
-  case Some(finalDataType) => CreateArray(children.map(Cast(_, 
finalDataType)))
+  case Some(final

spark git commit: [SPARK-18230][MLLIB] Throw a better exception, if the user or product doesn't exist

2018-07-16 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 9549a2814 -> cf9704534


[SPARK-18230][MLLIB] Throw a better exception, if the user or product doesn't 
exist

When invoking MatrixFactorizationModel.recommendProducts(Int, Int) with a 
non-existing user, a java.util.NoSuchElementException is thrown:

> java.util.NoSuchElementException: next on empty iterator
at scala.collection.Iterator$$anon$2.next(Iterator.scala:39)
at scala.collection.Iterator$$anon$2.next(Iterator.scala:37)
at 
scala.collection.IndexedSeqLike$Elements.next(IndexedSeqLike.scala:63)
at scala.collection.IterableLike$class.head(IterableLike.scala:107)
at 
scala.collection.mutable.WrappedArray.scala$collection$IndexedSeqOptimized$$super$head(WrappedArray.scala:35)
at 
scala.collection.IndexedSeqOptimized$class.head(IndexedSeqOptimized.scala:126)
at scala.collection.mutable.WrappedArray.head(WrappedArray.scala:35)
at 
org.apache.spark.mllib.recommendation.MatrixFactorizationModel.recommendProducts(MatrixFactorizationModel.scala:169)

## What changes were proposed in this pull request?
Throw a better exception, like "user-id/product-id doesn't found in the model", 
for a non-existent user/product

## How was this patch tested?
Added UT

Author: Shahid 

Closes #21740 from shahidki31/checkInvalidUserProduct.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cf970453
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cf970453
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cf970453

Branch: refs/heads/master
Commit: cf9704534903b5bbd9bd4834728c92953e45293e
Parents: 9549a28
Author: Shahid 
Authored: Mon Jul 16 09:50:43 2018 -0500
Committer: Sean Owen 
Committed: Mon Jul 16 09:50:43 2018 -0500

--
 .../MatrixFactorizationModel.scala  | 23 +++-
 .../MatrixFactorizationModelSuite.scala | 21 ++
 2 files changed, 38 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cf970453/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index ac709ad..7b49d4d 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -78,8 +78,13 @@ class MatrixFactorizationModel @Since("0.8.0") (
   /** Predict the rating of one user for one product. */
   @Since("0.8.0")
   def predict(user: Int, product: Int): Double = {
-val userVector = userFeatures.lookup(user).head
-val productVector = productFeatures.lookup(product).head
+val userFeatureSeq = userFeatures.lookup(user)
+require(userFeatureSeq.nonEmpty, s"userId: $user not found in the model")
+val productFeatureSeq = productFeatures.lookup(product)
+require(productFeatureSeq.nonEmpty, s"productId: $product not found in the 
model")
+
+val userVector = userFeatureSeq.head
+val productVector = productFeatureSeq.head
 blas.ddot(rank, userVector, 1, productVector, 1)
   }
 
@@ -164,9 +169,12 @@ class MatrixFactorizationModel @Since("0.8.0") (
*  recommended the product is.
*/
   @Since("1.1.0")
-  def recommendProducts(user: Int, num: Int): Array[Rating] =
-MatrixFactorizationModel.recommend(userFeatures.lookup(user).head, 
productFeatures, num)
+  def recommendProducts(user: Int, num: Int): Array[Rating] = {
+val userFeatureSeq = userFeatures.lookup(user)
+require(userFeatureSeq.nonEmpty, s"userId: $user not found in the model")
+MatrixFactorizationModel.recommend(userFeatureSeq.head, productFeatures, 
num)
   .map(t => Rating(user, t._1, t._2))
+  }
 
   /**
* Recommends users to a product. That is, this returns users who are most 
likely to be
@@ -181,9 +189,12 @@ class MatrixFactorizationModel @Since("0.8.0") (
*  recommended the user is.
*/
   @Since("1.1.0")
-  def recommendUsers(product: Int, num: Int): Array[Rating] =
-MatrixFactorizationModel.recommend(productFeatures.lookup(product).head, 
userFeatures, num)
+  def recommendUsers(product: Int, num: Int): Array[Rating] = {
+val productFeatureSeq = productFeatures.lookup(product)
+require(productFeatureSeq.nonEmpty, s"productId: $product not found in the 
model")
+MatrixFactorizationModel.recommend(productFeatureSeq.head, userFeatures, 
num)
   .map(t => Rating(t._1, product, t._2))
+  }
 
   protected override val formatVersion: String = "1.0"
 

http://git-wi

spark git commit: Update for spark 2.2.2 release

2018-07-16 Thread tgraves

Repository: spark
Updated Branches:
  refs/heads/branch-2.2 4bc4ccd63 -> 17db57213


Update for spark 2.2.2 release

Release process for Spark 2.2.2 say to update the test.

Author: Thomas Graves 

Closes #21707 from tgravescs/spark222-release.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/17db5721
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/17db5721
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/17db5721

Branch: refs/heads/branch-2.2
Commit: 17db57213aabc13cb59d13f0f570c7539dae
Parents: 4bc4ccd
Author: Thomas Graves 
Authored: Mon Jul 16 09:29:20 2018 -0500
Committer: Thomas Graves 
Committed: Mon Jul 16 09:29:20 2018 -0500

--
 .../apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/17db5721/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 313059b..e6a6cac 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -170,7 +170,7 @@ class HiveExternalCatalogVersionsSuite extends 
SparkSubmitTestUtils {
 
 object PROCESS_TABLES extends QueryTest with SQLTestUtils {
   // Tests the latest version of every release line.
-  val testingVersions = Seq("2.0.2", "2.1.2", "2.2.1")
+  val testingVersions = Seq("2.0.2", "2.1.3", "2.2.2")
 
   protected var spark: SparkSession = _
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r28142 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_06_07-9549a28-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-07-16 Thread pwendell

Author: pwendell
Date: Mon Jul 16 13:21:42 2018
New Revision: 28142

Log:
Apache Spark 2.4.0-SNAPSHOT-2018_07_16_06_07-9549a28 docs


[This commit notification would consist of 1467 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-24549][SQL] Support Decimal type push down to the parquet data sources

2018-07-16 Thread wenchen

Repository: spark
Updated Branches:
  refs/heads/master 2603ae30b -> 9549a2814


[SPARK-24549][SQL] Support Decimal type push down to the parquet data sources

## What changes were proposed in this pull request?

Support Decimal type push down to the parquet data sources.
The Decimal comparator used is: 
[`BINARY_AS_SIGNED_INTEGER_COMPARATOR`](https://github.com/apache/parquet-mr/blob/c6764c4a0848abf1d581e22df8b33e28ee9f2ced/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java#L224-L292).

## How was this patch tested?

unit tests and manual tests.

**manual tests**:
```scala
spark.range(1000).selectExpr("id", "cast(id as decimal(9)) as d1", "cast(id 
as decimal(9, 2)) as d2", "cast(id as decimal(18)) as d3", "cast(id as 
decimal(18, 4)) as d4", "cast(id as decimal(38)) as d5", "cast(id as 
decimal(38, 18)) as d6").coalesce(1).write.option("parquet.block.size", 
1048576).parquet("/tmp/spark/parquet/decimal")
val df = spark.read.parquet("/tmp/spark/parquet/decimal/")
spark.sql("set spark.sql.parquet.filterPushdown.decimal=true")
// Only read about 1 MB data
df.filter("d2 = 1").show
// Only read about 1 MB data
df.filter("d4 = 1").show
spark.sql("set spark.sql.parquet.filterPushdown.decimal=false")
// Read 174.3 MB data
df.filter("d2 = 1").show
// Read 174.3 MB data
df.filter("d4 = 1").show
```

Author: Yuming Wang 

Closes #21556 from wangyum/SPARK-24549.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9549a281
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9549a281
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9549a281

Branch: refs/heads/master
Commit: 9549a2814951f9ba969955d78ac4bd2240f85989
Parents: 2603ae3
Author: Yuming Wang 
Authored: Mon Jul 16 15:44:51 2018 +0800
Committer: Wenchen Fan 
Committed: Mon Jul 16 15:44:51 2018 +0800

--
 .../org/apache/spark/sql/internal/SQLConf.scala |  10 +
 .../FilterPushdownBenchmark-results.txt |  96 
 .../datasources/parquet/ParquetFileFormat.scala |   3 +-
 .../datasources/parquet/ParquetFilters.scala| 225 ++-
 .../benchmark/FilterPushdownBenchmark.scala |   8 +-
 .../parquet/ParquetFilterSuite.scala|  90 +++-
 6 files changed, 324 insertions(+), 108 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9549a281/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 07d33fa..41fe0c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -387,6 +387,14 @@ object SQLConf {
 .booleanConf
 .createWithDefault(true)
 
+  val PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED =
+buildConf("spark.sql.parquet.filterPushdown.decimal")
+  .doc("If true, enables Parquet filter push-down optimization for 
Decimal. " +
+"This configuration only has an effect when 
'spark.sql.parquet.filterPushdown' is enabled.")
+  .internal()
+  .booleanConf
+  .createWithDefault(true)
+
   val PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED =
 buildConf("spark.sql.parquet.filterPushdown.string.startsWith")
 .doc("If true, enables Parquet filter push-down optimization for string 
startsWith function. " +
@@ -1505,6 +1513,8 @@ class SQLConf extends Serializable with Logging {
 
   def parquetFilterPushDownTimestamp: Boolean = 
getConf(PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED)
 
+  def parquetFilterPushDownDecimal: Boolean = 
getConf(PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED)
+
   def parquetFilterPushDownStringStartWith: Boolean =
 getConf(PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9549a281/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
--
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt 
b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
index 4f38cc4..2215ed9 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
@@ -292,120 +292,120 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
 
 Select 1 decimal(9, 2) row (value = 7864320): Best/Avg Time(ms)Rate(M/s)   
Per Row(ns)   Relative
 

-Parquet Vectorized3785 / 3867  4.2 
240.6   1.0X
-Parquet Vectorized (Pushdown)

svn commit: r28134 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_00_02-2603ae3-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-07-16 Thread pwendell

Author: pwendell
Date: Mon Jul 16 07:17:15 2018
New Revision: 28134

Log:
Apache Spark 2.4.0-SNAPSHOT-2018_07_16_00_02-2603ae3 docs


[This commit notification would consist of 1467 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Revert "[SPARK-24402][SQL] Optimize `In` expression when only one element in the collection or collection is empty"

svn commit: r28164 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_20_01-f876d3f-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

spark git commit: [SPARK-20220][DOCS] Documentation Add thrift scheduling pool config to scheduling docs

spark git commit: [SPARK-23259][SQL] Clean up legacy code around hive external catalog and HiveClientImpl

svn commit: r28159 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_16_01-0f0d186-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

spark git commit: [SPARK-24402][SQL] Optimize `In` expression when only one element in the collection or collection is empty

spark git commit: [SPARK-24805][SQL] Do not ignore avro files without extensions by default

spark git commit: [SPARK-23901][SQL] Removing masking functions

svn commit: r28155 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_12_01-b045315-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

svn commit: r28148 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_08_02-cf97045-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

spark git commit: [SPARK-24734][SQL] Fix type coercions and nullabilities of nested data types of some functions.

spark git commit: [SPARK-18230][MLLIB] Throw a better exception, if the user or product doesn't exist

spark git commit: Update for spark 2.2.2 release

svn commit: r28142 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_06_07-9549a28-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

spark git commit: [SPARK-24549][SQL] Support Decimal type push down to the parquet data sources

svn commit: r28134 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_16_00_02-2603ae3-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

16 matches

Site Navigation

Mail list logo

Footer information